1/*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 *    must display the following acknowledgement:
46 *	This product includes software developed by the University of
47 *	California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 *    may be used to endorse or promote products derived from this software
50 *    without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *	@(#)nfs_subs.c	8.8 (Berkeley) 5/22/95
65 * FreeBSD-Id: nfs_subs.c,v 1.47 1997/11/07 08:53:24 phk Exp $
66 */
67
68/*
69 * These functions support the macros and help fiddle mbuf chains for
70 * the nfs op functions. They do things like create the rpc header and
71 * copy data between mbuf chains and uio lists.
72 */
73#include <sys/param.h>
74#include <sys/proc.h>
75#include <sys/kauth.h>
76#include <sys/systm.h>
77#include <sys/kernel.h>
78#include <sys/mount_internal.h>
79#include <sys/vnode_internal.h>
80#include <sys/kpi_mbuf.h>
81#include <sys/socket.h>
82#include <sys/stat.h>
83#include <sys/malloc.h>
84#include <sys/syscall.h>
85#include <sys/ubc_internal.h>
86#include <sys/fcntl.h>
87#include <sys/uio.h>
88#include <sys/domain.h>
89#include <libkern/OSAtomic.h>
90#include <kern/thread_call.h>
91
92#include <sys/vm.h>
93#include <sys/vmparam.h>
94
95#include <sys/time.h>
96#include <kern/clock.h>
97
98#include <nfs/rpcv2.h>
99#include <nfs/nfsproto.h>
100#include <nfs/nfs.h>
101#include <nfs/nfsnode.h>
102#if NFSCLIENT
103#define _NFS_XDR_SUBS_FUNCS_ /* define this to get xdrbuf function definitions */
104#endif
105#include <nfs/xdr_subs.h>
106#include <nfs/nfsm_subs.h>
107#include <nfs/nfs_gss.h>
108#include <nfs/nfsmount.h>
109#include <nfs/nfs_lock.h>
110
111#include <miscfs/specfs/specdev.h>
112
113#include <netinet/in.h>
114#include <net/kpi_interface.h>
115
116#include <sys/utfconv.h>
117
118/*
119 * NFS globals
120 */
121struct nfsstats	__attribute__((aligned(8))) nfsstats;
122size_t nfs_mbuf_mhlen = 0, nfs_mbuf_minclsize = 0;
123
124/*
125 * functions to convert between NFS and VFS types
126 */
127nfstype
128vtonfs_type(enum vtype vtype, int nfsvers)
129{
130	switch (vtype) {
131	case VNON:
132		return NFNON;
133	case VREG:
134		return NFREG;
135	case VDIR:
136		return NFDIR;
137	case VBLK:
138		return NFBLK;
139	case VCHR:
140		return NFCHR;
141	case VLNK:
142		return NFLNK;
143	case VSOCK:
144		if (nfsvers > NFS_VER2)
145			return NFSOCK;
146	case VFIFO:
147		if (nfsvers > NFS_VER2)
148			return NFFIFO;
149	case VBAD:
150	case VSTR:
151	case VCPLX:
152	default:
153		return NFNON;
154	}
155}
156
157enum vtype
158nfstov_type(nfstype nvtype, int nfsvers)
159{
160	switch (nvtype) {
161	case NFNON:
162		return VNON;
163	case NFREG:
164		return VREG;
165	case NFDIR:
166		return VDIR;
167	case NFBLK:
168		return VBLK;
169	case NFCHR:
170		return VCHR;
171	case NFLNK:
172		return VLNK;
173	case NFSOCK:
174		if (nfsvers > NFS_VER2)
175			return VSOCK;
176	case NFFIFO:
177		if (nfsvers > NFS_VER2)
178			return VFIFO;
179	case NFATTRDIR:
180		if (nfsvers > NFS_VER3)
181			return VDIR;
182	case NFNAMEDATTR:
183		if (nfsvers > NFS_VER3)
184			return VREG;
185	default:
186		return VNON;
187	}
188}
189
190int
191vtonfsv2_mode(enum vtype vtype, mode_t m)
192{
193	switch (vtype) {
194	case VNON:
195	case VREG:
196	case VDIR:
197	case VBLK:
198	case VCHR:
199	case VLNK:
200	case VSOCK:
201		return vnode_makeimode(vtype, m);
202	case VFIFO:
203		return vnode_makeimode(VCHR, m);
204	case VBAD:
205	case VSTR:
206	case VCPLX:
207	default:
208		return vnode_makeimode(VNON, m);
209	}
210}
211
212#if NFSSERVER
213
214/*
215 * Mapping of old NFS Version 2 RPC numbers to generic numbers.
216 */
217int nfsv3_procid[NFS_NPROCS] = {
218	NFSPROC_NULL,
219	NFSPROC_GETATTR,
220	NFSPROC_SETATTR,
221	NFSPROC_NOOP,
222	NFSPROC_LOOKUP,
223	NFSPROC_READLINK,
224	NFSPROC_READ,
225	NFSPROC_NOOP,
226	NFSPROC_WRITE,
227	NFSPROC_CREATE,
228	NFSPROC_REMOVE,
229	NFSPROC_RENAME,
230	NFSPROC_LINK,
231	NFSPROC_SYMLINK,
232	NFSPROC_MKDIR,
233	NFSPROC_RMDIR,
234	NFSPROC_READDIR,
235	NFSPROC_FSSTAT,
236	NFSPROC_NOOP,
237	NFSPROC_NOOP,
238	NFSPROC_NOOP,
239	NFSPROC_NOOP,
240	NFSPROC_NOOP
241};
242
243#endif /* NFSSERVER */
244
245/*
246 * and the reverse mapping from generic to Version 2 procedure numbers
247 */
248int nfsv2_procid[NFS_NPROCS] = {
249	NFSV2PROC_NULL,
250	NFSV2PROC_GETATTR,
251	NFSV2PROC_SETATTR,
252	NFSV2PROC_LOOKUP,
253	NFSV2PROC_NOOP,
254	NFSV2PROC_READLINK,
255	NFSV2PROC_READ,
256	NFSV2PROC_WRITE,
257	NFSV2PROC_CREATE,
258	NFSV2PROC_MKDIR,
259	NFSV2PROC_SYMLINK,
260	NFSV2PROC_CREATE,
261	NFSV2PROC_REMOVE,
262	NFSV2PROC_RMDIR,
263	NFSV2PROC_RENAME,
264	NFSV2PROC_LINK,
265	NFSV2PROC_READDIR,
266	NFSV2PROC_NOOP,
267	NFSV2PROC_STATFS,
268	NFSV2PROC_NOOP,
269	NFSV2PROC_NOOP,
270	NFSV2PROC_NOOP,
271	NFSV2PROC_NOOP
272};
273
274
275/*
276 * initialize NFS's cache of mbuf constants
277 */
278void
279nfs_mbuf_init(void)
280{
281	struct mbuf_stat ms;
282
283	mbuf_stats(&ms);
284	nfs_mbuf_mhlen = ms.mhlen;
285	nfs_mbuf_minclsize = ms.minclsize;
286}
287
288#if NFSSERVER
289
290/*
291 * allocate a list of mbufs to hold the given amount of data
292 */
293int
294nfsm_mbuf_get_list(size_t size, mbuf_t *mp, int *mbcnt)
295{
296	int error, cnt;
297	mbuf_t mhead, mlast, m;
298	size_t len, mlen;
299
300	error = cnt = 0;
301	mhead = mlast = NULL;
302	len = 0;
303
304	while (len < size) {
305		nfsm_mbuf_get(error, &m, (size - len));
306		if (error)
307			break;
308		if (!mhead)
309			mhead = m;
310		if (mlast && ((error = mbuf_setnext(mlast, m)))) {
311			mbuf_free(m);
312			break;
313		}
314		mlen = mbuf_maxlen(m);
315		if ((len + mlen) > size)
316			mlen = size - len;
317		mbuf_setlen(m, mlen);
318		len += mlen;
319		cnt++;
320		mlast = m;
321	}
322
323	if (!error) {
324		*mp = mhead;
325		*mbcnt = cnt;
326	}
327	return (error);
328}
329
330#endif /* NFSSERVER */
331
332/*
333 * nfsm_chain_new_mbuf()
334 *
335 * Add a new mbuf to the given chain.
336 */
337int
338nfsm_chain_new_mbuf(struct nfsm_chain *nmc, size_t sizehint)
339{
340	mbuf_t mb;
341	int error = 0;
342
343	if (nmc->nmc_flags & NFSM_CHAIN_FLAG_ADD_CLUSTERS)
344		sizehint = nfs_mbuf_minclsize;
345
346	/* allocate a new mbuf */
347	nfsm_mbuf_get(error, &mb, sizehint);
348	if (error)
349		return (error);
350	if (mb == NULL)
351		panic("got NULL mbuf?");
352
353	/* do we have a current mbuf? */
354	if (nmc->nmc_mcur) {
355		/* first cap off current mbuf */
356		mbuf_setlen(nmc->nmc_mcur, nmc->nmc_ptr - (caddr_t)mbuf_data(nmc->nmc_mcur));
357		/* then append the new mbuf */
358		error = mbuf_setnext(nmc->nmc_mcur, mb);
359		if (error) {
360			mbuf_free(mb);
361			return (error);
362		}
363	}
364
365	/* set up for using the new mbuf */
366	nmc->nmc_mcur = mb;
367	nmc->nmc_ptr = mbuf_data(mb);
368	nmc->nmc_left = mbuf_trailingspace(mb);
369
370	return (0);
371}
372
373/*
374 * nfsm_chain_add_opaque_f()
375 *
376 * Add "len" bytes of opaque data pointed to by "buf" to the given chain.
377 */
378int
379nfsm_chain_add_opaque_f(struct nfsm_chain *nmc, const u_char *buf, uint32_t len)
380{
381	uint32_t paddedlen, tlen;
382	int error;
383
384	paddedlen = nfsm_rndup(len);
385
386	while (paddedlen) {
387		if (!nmc->nmc_left) {
388			error = nfsm_chain_new_mbuf(nmc, paddedlen);
389			if (error)
390				return (error);
391		}
392		tlen = MIN(nmc->nmc_left, paddedlen);
393		if (tlen) {
394			if (len) {
395				if (tlen > len)
396					tlen = len;
397				bcopy(buf, nmc->nmc_ptr, tlen);
398			} else {
399				bzero(nmc->nmc_ptr, tlen);
400			}
401			nmc->nmc_ptr += tlen;
402			nmc->nmc_left -= tlen;
403			paddedlen -= tlen;
404			if (len) {
405				buf += tlen;
406				len -= tlen;
407			}
408		}
409	}
410	return (0);
411}
412
413/*
414 * nfsm_chain_add_opaque_nopad_f()
415 *
416 * Add "len" bytes of opaque data pointed to by "buf" to the given chain.
417 * Do not XDR pad.
418 */
419int
420nfsm_chain_add_opaque_nopad_f(struct nfsm_chain *nmc, const u_char *buf, uint32_t len)
421{
422	uint32_t tlen;
423	int error;
424
425	while (len > 0) {
426		if (nmc->nmc_left <= 0) {
427			error = nfsm_chain_new_mbuf(nmc, len);
428			if (error)
429				return (error);
430		}
431		tlen = MIN(nmc->nmc_left, len);
432		bcopy(buf, nmc->nmc_ptr, tlen);
433		nmc->nmc_ptr += tlen;
434		nmc->nmc_left -= tlen;
435		len -= tlen;
436		buf += tlen;
437	}
438	return (0);
439}
440
441/*
442 * nfsm_chain_add_uio()
443 *
444 * Add "len" bytes of data from "uio" to the given chain.
445 */
446int
447nfsm_chain_add_uio(struct nfsm_chain *nmc, uio_t uio, uint32_t len)
448{
449	uint32_t paddedlen, tlen;
450	int error;
451
452	paddedlen = nfsm_rndup(len);
453
454	while (paddedlen) {
455		if (!nmc->nmc_left) {
456			error = nfsm_chain_new_mbuf(nmc, paddedlen);
457			if (error)
458				return (error);
459		}
460		tlen = MIN(nmc->nmc_left, paddedlen);
461		if (tlen) {
462			if (len) {
463				if (tlen > len)
464					tlen = len;
465				uiomove(nmc->nmc_ptr, tlen, uio);
466			} else {
467				bzero(nmc->nmc_ptr, tlen);
468			}
469			nmc->nmc_ptr += tlen;
470			nmc->nmc_left -= tlen;
471			paddedlen -= tlen;
472			if (len)
473				len -= tlen;
474		}
475	}
476	return (0);
477}
478
479/*
480 * Find the length of the NFS mbuf chain
481 * up to the current encoding/decoding offset.
482 */
483int
484nfsm_chain_offset(struct nfsm_chain *nmc)
485{
486	mbuf_t mb;
487	int len = 0;
488
489	for (mb = nmc->nmc_mhead; mb; mb = mbuf_next(mb)) {
490		if (mb == nmc->nmc_mcur)
491			return (len + (nmc->nmc_ptr - (caddr_t) mbuf_data(mb)));
492		len += mbuf_len(mb);
493	}
494
495	return (len);
496}
497
498/*
499 * nfsm_chain_advance()
500 *
501 * Advance an nfsm_chain by "len" bytes.
502 */
503int
504nfsm_chain_advance(struct nfsm_chain *nmc, uint32_t len)
505{
506	mbuf_t mb;
507
508	while (len) {
509		if (nmc->nmc_left >= len) {
510			nmc->nmc_left -= len;
511			nmc->nmc_ptr += len;
512			return (0);
513		}
514		len -= nmc->nmc_left;
515		nmc->nmc_mcur = mb = mbuf_next(nmc->nmc_mcur);
516		if (!mb)
517			return (EBADRPC);
518		nmc->nmc_ptr = mbuf_data(mb);
519		nmc->nmc_left = mbuf_len(mb);
520	}
521
522	return (0);
523}
524
525/*
526 * nfsm_chain_reverse()
527 *
528 * Reverse decode offset in an nfsm_chain by "len" bytes.
529 */
530int
531nfsm_chain_reverse(struct nfsm_chain *nmc, uint32_t len)
532{
533	uint32_t mlen, new_offset;
534	int error = 0;
535
536	mlen = nmc->nmc_ptr - (caddr_t) mbuf_data(nmc->nmc_mcur);
537	if (len <= mlen) {
538		nmc->nmc_ptr -= len;
539		nmc->nmc_left += len;
540		return (0);
541	}
542
543	new_offset = nfsm_chain_offset(nmc) - len;
544	nfsm_chain_dissect_init(error, nmc, nmc->nmc_mhead);
545	if (error)
546		return (error);
547
548	return (nfsm_chain_advance(nmc, new_offset));
549}
550
551/*
552 * nfsm_chain_get_opaque_pointer_f()
553 *
554 * Return a pointer to the next "len" bytes of contiguous data in
555 * the mbuf chain.  If the next "len" bytes are not contiguous, we
556 * try to manipulate the mbuf chain so that it is.
557 *
558 * The nfsm_chain is advanced by nfsm_rndup("len") bytes.
559 */
560int
561nfsm_chain_get_opaque_pointer_f(struct nfsm_chain *nmc, uint32_t len, u_char **pptr)
562{
563	mbuf_t mbcur, mb;
564	uint32_t left, need, mblen, cplen, padlen;
565	u_char *ptr;
566	int error = 0;
567
568	/* move to next mbuf with data */
569	while (nmc->nmc_mcur && (nmc->nmc_left == 0)) {
570		mb = mbuf_next(nmc->nmc_mcur);
571		nmc->nmc_mcur = mb;
572		if (!mb)
573			break;
574		nmc->nmc_ptr = mbuf_data(mb);
575		nmc->nmc_left = mbuf_len(mb);
576	}
577	/* check if we've run out of data */
578	if (!nmc->nmc_mcur)
579		return (EBADRPC);
580
581	/* do we already have a contiguous buffer? */
582	if (nmc->nmc_left >= len) {
583		/* the returned pointer will be the current pointer */
584		*pptr = (u_char*)nmc->nmc_ptr;
585		error = nfsm_chain_advance(nmc, nfsm_rndup(len));
586		return (error);
587	}
588
589	padlen = nfsm_rndup(len) - len;
590
591	/* we need (len - left) more bytes */
592	mbcur = nmc->nmc_mcur;
593	left = nmc->nmc_left;
594	need = len - left;
595
596	if (need > mbuf_trailingspace(mbcur)) {
597		/*
598		 * The needed bytes won't fit in the current mbuf so we'll
599		 * allocate a new mbuf to hold the contiguous range of data.
600		 */
601		nfsm_mbuf_get(error, &mb, len);
602		if (error)
603			return (error);
604		/* double check that this mbuf can hold all the data */
605		if (mbuf_maxlen(mb) < len) {
606			mbuf_free(mb);
607			return (EOVERFLOW);
608		}
609
610		/* the returned pointer will be the new mbuf's data pointer */
611		*pptr = ptr = mbuf_data(mb);
612
613		/* copy "left" bytes to the new mbuf */
614		bcopy(nmc->nmc_ptr, ptr, left);
615		ptr += left;
616		mbuf_setlen(mb, left);
617
618		/* insert the new mbuf between the current and next mbufs */
619		error = mbuf_setnext(mb, mbuf_next(mbcur));
620		if (!error)
621			error = mbuf_setnext(mbcur, mb);
622		if (error) {
623			mbuf_free(mb);
624			return (error);
625		}
626
627		/* reduce current mbuf's length by "left" */
628		mbuf_setlen(mbcur, mbuf_len(mbcur) - left);
629
630		/*
631		 * update nmc's state to point at the end of the mbuf
632		 * where the needed data will be copied to.
633		 */
634		nmc->nmc_mcur = mbcur = mb;
635		nmc->nmc_left = 0;
636		nmc->nmc_ptr = (caddr_t)ptr;
637	} else {
638		/* The rest of the data will fit in this mbuf. */
639
640		/* the returned pointer will be the current pointer */
641		*pptr = (u_char*)nmc->nmc_ptr;
642
643		/*
644		 * update nmc's state to point at the end of the mbuf
645		 * where the needed data will be copied to.
646		 */
647		nmc->nmc_ptr += left;
648		nmc->nmc_left = 0;
649	}
650
651	/*
652	 * move the next "need" bytes into the current
653	 * mbuf from the mbufs that follow
654	 */
655
656	/* extend current mbuf length */
657	mbuf_setlen(mbcur, mbuf_len(mbcur) + need);
658
659	/* mb follows mbufs we're copying/compacting data from */
660	mb = mbuf_next(mbcur);
661
662	while (need && mb) {
663		/* copy as much as we need/can */
664		ptr = mbuf_data(mb);
665		mblen = mbuf_len(mb);
666		cplen = MIN(mblen, need);
667		if (cplen) {
668			bcopy(ptr, nmc->nmc_ptr, cplen);
669			/*
670			 * update the mbuf's pointer and length to reflect that
671			 * the data was shifted to an earlier mbuf in the chain
672			 */
673			error = mbuf_setdata(mb, ptr + cplen, mblen - cplen);
674			if (error) {
675				mbuf_setlen(mbcur, mbuf_len(mbcur) - need);
676				return (error);
677			}
678			/* update pointer/need */
679			nmc->nmc_ptr += cplen;
680			need -= cplen;
681		}
682		/* if more needed, go to next mbuf */
683		if (need)
684			mb = mbuf_next(mb);
685	}
686
687	/* did we run out of data in the mbuf chain? */
688	if (need) {
689		mbuf_setlen(mbcur, mbuf_len(mbcur) - need);
690		return (EBADRPC);
691	}
692
693	/*
694	 * update nmc's state to point after this contiguous data
695	 *
696	 * "mb" points to the last mbuf we copied data from so we
697	 * just set nmc to point at whatever remains in that mbuf.
698	 */
699	nmc->nmc_mcur = mb;
700	nmc->nmc_ptr = mbuf_data(mb);
701	nmc->nmc_left = mbuf_len(mb);
702
703	/* move past any padding */
704	if (padlen)
705		error = nfsm_chain_advance(nmc, padlen);
706
707	return (error);
708}
709
710/*
711 * nfsm_chain_get_opaque_f()
712 *
713 * Read the next "len" bytes in the chain into "buf".
714 * The nfsm_chain is advanced by nfsm_rndup("len") bytes.
715 */
716int
717nfsm_chain_get_opaque_f(struct nfsm_chain *nmc, uint32_t len, u_char *buf)
718{
719	uint32_t cplen, padlen;
720	int error = 0;
721
722	padlen = nfsm_rndup(len) - len;
723
724	/* loop through mbufs copying all the data we need */
725	while (len && nmc->nmc_mcur) {
726		/* copy as much as we need/can */
727		cplen = MIN(nmc->nmc_left, len);
728		if (cplen) {
729			bcopy(nmc->nmc_ptr, buf, cplen);
730			nmc->nmc_ptr += cplen;
731			nmc->nmc_left -= cplen;
732			buf += cplen;
733			len -= cplen;
734		}
735		/* if more needed, go to next mbuf */
736		if (len) {
737			mbuf_t mb = mbuf_next(nmc->nmc_mcur);
738			nmc->nmc_mcur = mb;
739			nmc->nmc_ptr = mb ? mbuf_data(mb) : NULL;
740			nmc->nmc_left = mb ? mbuf_len(mb) : 0;
741		}
742	}
743
744	/* did we run out of data in the mbuf chain? */
745	if (len)
746		return (EBADRPC);
747
748	if (padlen)
749		nfsm_chain_adv(error, nmc, padlen);
750
751	return (error);
752}
753
754/*
755 * nfsm_chain_get_uio()
756 *
757 * Read the next "len" bytes in the chain into the given uio.
758 * The nfsm_chain is advanced by nfsm_rndup("len") bytes.
759 */
760int
761nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, uio_t uio)
762{
763	uint32_t cplen, padlen;
764	int error = 0;
765
766	padlen = nfsm_rndup(len) - len;
767
768	/* loop through mbufs copying all the data we need */
769	while (len && nmc->nmc_mcur) {
770		/* copy as much as we need/can */
771		cplen = MIN(nmc->nmc_left, len);
772		if (cplen) {
773			error = uiomove(nmc->nmc_ptr, cplen, uio);
774			if (error)
775				return (error);
776			nmc->nmc_ptr += cplen;
777			nmc->nmc_left -= cplen;
778			len -= cplen;
779		}
780		/* if more needed, go to next mbuf */
781		if (len) {
782			mbuf_t mb = mbuf_next(nmc->nmc_mcur);
783			nmc->nmc_mcur = mb;
784			nmc->nmc_ptr = mb ? mbuf_data(mb) : NULL;
785			nmc->nmc_left = mb ? mbuf_len(mb) : 0;
786		}
787	}
788
789	/* did we run out of data in the mbuf chain? */
790	if (len)
791		return (EBADRPC);
792
793	if (padlen)
794		nfsm_chain_adv(error, nmc, padlen);
795
796	return (error);
797}
798
799#if NFSCLIENT
800
801int
802nfsm_chain_add_string_nfc(struct nfsm_chain *nmc, const uint8_t *s, uint32_t slen)
803{
804	uint8_t smallbuf[64];
805	uint8_t *nfcname = smallbuf;
806	size_t buflen = sizeof(smallbuf), nfclen;
807	int error;
808
809	error = utf8_normalizestr(s, slen, nfcname, &nfclen, buflen, UTF_PRECOMPOSED|UTF_NO_NULL_TERM);
810	if (error == ENAMETOOLONG) {
811		buflen = MAXPATHLEN;
812		MALLOC_ZONE(nfcname, uint8_t *, MAXPATHLEN, M_NAMEI, M_WAITOK);
813		if (nfcname)
814			error = utf8_normalizestr(s, slen, nfcname, &nfclen, buflen, UTF_PRECOMPOSED|UTF_NO_NULL_TERM);
815	}
816
817	/* if we got an error, just use the original string */
818	if (error)
819		nfsm_chain_add_string(error, nmc, s, slen);
820	else
821		nfsm_chain_add_string(error, nmc, nfcname, nfclen);
822
823	if (nfcname && (nfcname != smallbuf))
824		FREE_ZONE(nfcname, MAXPATHLEN, M_NAMEI);
825	return (error);
826}
827
828/*
829 * Add an NFSv2 "sattr" structure to an mbuf chain
830 */
831int
832nfsm_chain_add_v2sattr_f(struct nfsm_chain *nmc, struct vnode_attr *vap, uint32_t szrdev)
833{
834	int error = 0;
835
836	nfsm_chain_add_32(error, nmc, vtonfsv2_mode(vap->va_type,
837		(VATTR_IS_ACTIVE(vap, va_mode) ? vap->va_mode : 0600)));
838	nfsm_chain_add_32(error, nmc,
839		VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : (uint32_t)-1);
840	nfsm_chain_add_32(error, nmc,
841		VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : (uint32_t)-1);
842	nfsm_chain_add_32(error, nmc, szrdev);
843	nfsm_chain_add_v2time(error, nmc,
844		VATTR_IS_ACTIVE(vap, va_access_time) ?
845		&vap->va_access_time : NULL);
846	nfsm_chain_add_v2time(error, nmc,
847		VATTR_IS_ACTIVE(vap, va_modify_time) ?
848		&vap->va_modify_time : NULL);
849
850	return (error);
851}
852
853/*
854 * Add an NFSv3 "sattr" structure to an mbuf chain
855 */
856int
857nfsm_chain_add_v3sattr_f(struct nfsm_chain *nmc, struct vnode_attr *vap)
858{
859	int error = 0;
860
861	if (VATTR_IS_ACTIVE(vap, va_mode)) {
862		nfsm_chain_add_32(error, nmc, TRUE);
863		nfsm_chain_add_32(error, nmc, vap->va_mode);
864	} else {
865		nfsm_chain_add_32(error, nmc, FALSE);
866	}
867	if (VATTR_IS_ACTIVE(vap, va_uid)) {
868		nfsm_chain_add_32(error, nmc, TRUE);
869		nfsm_chain_add_32(error, nmc, vap->va_uid);
870	} else {
871		nfsm_chain_add_32(error, nmc, FALSE);
872	}
873	if (VATTR_IS_ACTIVE(vap, va_gid)) {
874		nfsm_chain_add_32(error, nmc, TRUE);
875		nfsm_chain_add_32(error, nmc, vap->va_gid);
876	} else {
877		nfsm_chain_add_32(error, nmc, FALSE);
878	}
879	if (VATTR_IS_ACTIVE(vap, va_data_size)) {
880		nfsm_chain_add_32(error, nmc, TRUE);
881		nfsm_chain_add_64(error, nmc, vap->va_data_size);
882	} else {
883		nfsm_chain_add_32(error, nmc, FALSE);
884	}
885	if (vap->va_vaflags & VA_UTIMES_NULL) {
886		nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_SERVER);
887		nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_SERVER);
888	} else {
889		if (VATTR_IS_ACTIVE(vap, va_access_time)) {
890			nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_CLIENT);
891			nfsm_chain_add_32(error, nmc, vap->va_access_time.tv_sec);
892			nfsm_chain_add_32(error, nmc, vap->va_access_time.tv_nsec);
893		} else {
894			nfsm_chain_add_32(error, nmc, NFS_TIME_DONT_CHANGE);
895		}
896		if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
897			nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_CLIENT);
898			nfsm_chain_add_32(error, nmc, vap->va_modify_time.tv_sec);
899			nfsm_chain_add_32(error, nmc, vap->va_modify_time.tv_nsec);
900		} else {
901			nfsm_chain_add_32(error, nmc, NFS_TIME_DONT_CHANGE);
902		}
903	}
904
905	return (error);
906}
907
908
909/*
910 * nfsm_chain_get_fh_attr()
911 *
912 * Get the file handle and attributes from an mbuf chain. (NFSv2/v3)
913 */
914int
915nfsm_chain_get_fh_attr(
916	struct nfsm_chain *nmc,
917	nfsnode_t dnp,
918	vfs_context_t ctx,
919	int nfsvers,
920	uint64_t *xidp,
921	fhandle_t *fhp,
922	struct nfs_vattr *nvap)
923{
924	int error = 0, gotfh, gotattr;
925
926	gotfh = gotattr = 1;
927
928	if (nfsvers == NFS_VER3) /* check for file handle */
929		nfsm_chain_get_32(error, nmc, gotfh);
930	if (!error && gotfh) /* get file handle */
931		nfsm_chain_get_fh(error, nmc, nfsvers, fhp);
932	else
933		fhp->fh_len = 0;
934	if (nfsvers == NFS_VER3) /* check for file attributes */
935		nfsm_chain_get_32(error, nmc, gotattr);
936	nfsmout_if(error);
937	if (gotattr) {
938		if (!gotfh) /* skip attributes */
939			nfsm_chain_adv(error, nmc, NFSX_V3FATTR);
940		else /* get attributes */
941			error = nfs_parsefattr(nmc, nfsvers, nvap);
942	} else if (gotfh) {
943		/* we need valid attributes in order to call nfs_nget() */
944		if (nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, 0, ctx, nvap, xidp)) {
945			gotattr = 0;
946			fhp->fh_len = 0;
947		}
948	}
949nfsmout:
950	return (error);
951}
952
953/*
954 * Get and process NFSv3 WCC data from an mbuf chain
955 */
956int
957nfsm_chain_get_wcc_data_f(
958	struct nfsm_chain *nmc,
959	nfsnode_t np,
960	struct timespec *premtime,
961	int *newpostattr,
962	u_int64_t *xidp)
963{
964	int error = 0;
965	uint32_t flag = 0;
966
967	nfsm_chain_get_32(error, nmc, flag);
968	if (!error && flag) {
969		nfsm_chain_adv(error, nmc, 2 * NFSX_UNSIGNED);
970		nfsm_chain_get_32(error, nmc, premtime->tv_sec);
971		nfsm_chain_get_32(error, nmc, premtime->tv_nsec);
972		nfsm_chain_adv(error, nmc, 2 * NFSX_UNSIGNED);
973	} else {
974		premtime->tv_sec = 0;
975		premtime->tv_nsec = 0;
976	}
977	nfsm_chain_postop_attr_update_flag(error, nmc, np, *newpostattr, xidp);
978
979	return (error);
980}
981
982/*
983 * Get the next RPC transaction ID (XID)
984 */
985void
986nfs_get_xid(uint64_t *xidp)
987{
988	struct timeval tv;
989
990	lck_mtx_lock(nfs_request_mutex);
991	if (!nfs_xid) {
992		/*
993		 * Derive initial xid from system time.
994		 *
995		 * Note: it's OK if this code inits nfs_xid to 0 (for example,
996		 * due to a broken clock) because we immediately increment it
997		 * and we guarantee to never use xid 0.  So, nfs_xid should only
998		 * ever be 0 the first time this function is called.
999		 */
1000		microtime(&tv);
1001		nfs_xid = tv.tv_sec << 12;
1002	}
1003	if (++nfs_xid == 0) {
1004		/* Skip zero xid if it should ever happen. */
1005		nfs_xidwrap++;
1006		nfs_xid++;
1007	}
1008	*xidp = nfs_xid + ((uint64_t)nfs_xidwrap << 32);
1009	lck_mtx_unlock(nfs_request_mutex);
1010}
1011
1012/*
1013 * Build the RPC header and fill in the authorization info.
1014 * Returns the head of the mbuf list and the xid.
1015 */
1016
1017int
1018nfsm_rpchead(
1019	struct nfsreq *req,
1020	mbuf_t mrest,
1021	u_int64_t *xidp,
1022	mbuf_t *mreqp)
1023{
1024	struct nfsmount *nmp = req->r_nmp;
1025	int nfsvers = nmp->nm_vers;
1026	int proc = ((nfsvers == NFS_VER2) ? nfsv2_procid[req->r_procnum] : (int)req->r_procnum);
1027
1028	return nfsm_rpchead2(nmp, nmp->nm_sotype, NFS_PROG, nfsvers, proc,
1029			req->r_auth, req->r_cred, req, mrest, xidp, mreqp);
1030}
1031
1032/*
1033 * get_auiliary_groups:	Gets the supplementary groups from a credential.
1034 *
1035 * IN:		cred:	credential to get the associated groups from.
1036 * OUT:		groups:	An array of gids of NGROUPS size.
1037 * IN:		count:	The number of groups to get; i.e.; the number of groups the server supports
1038 *
1039 * returns:	The number of groups found.
1040 *
1041 * Just a wrapper around kauth_cred_getgroups to handle the case of a server supporting less
1042 * than NGROUPS.
1043 */
1044static int
1045get_auxiliary_groups(kauth_cred_t cred, gid_t groups[NGROUPS], int count)
1046{
1047	gid_t pgid;
1048	int maxcount = count < NGROUPS ? count + 1 : NGROUPS;
1049	int i;
1050
1051	for (i = 0; i < NGROUPS; i++)
1052		groups[i] = -2; /* Initialize to the nobody group */
1053
1054	(void)kauth_cred_getgroups(cred, groups, &maxcount);
1055	if (maxcount < 1)
1056		return (maxcount);
1057
1058	/*
1059	 * kauth_get_groups returns the primary group followed by the
1060	 * users auxiliary groups. If the number of groups the server supports
1061	 * is less than NGROUPS, then we will drop the first group so that
1062	 * we can send one more group over the wire.
1063	 */
1064
1065
1066	if (count < NGROUPS) {
1067		pgid = kauth_cred_getgid(cred);
1068		if (pgid == groups[0]) {
1069			maxcount -= 1;
1070			for (i = 0;  i < maxcount; i++) {
1071				groups[i] = groups[i+1];
1072			}
1073		}
1074	}
1075
1076	return (maxcount);
1077}
1078
1079int
1080nfsm_rpchead2(struct nfsmount *nmp, int sotype, int prog, int vers, int proc, int auth_type,
1081	kauth_cred_t cred, struct nfsreq *req, mbuf_t mrest, u_int64_t *xidp, mbuf_t *mreqp)
1082{
1083	mbuf_t mreq, mb;
1084	int error, i, auth_len = 0, authsiz, reqlen;
1085	size_t headlen;
1086	struct nfsm_chain nmreq;
1087	gid_t grouplist[NGROUPS];
1088	int groupcount;
1089
1090	/* calculate expected auth length */
1091	switch (auth_type) {
1092		case RPCAUTH_NONE:
1093			auth_len = 0;
1094			break;
1095		case RPCAUTH_SYS:
1096		    {
1097			int count = nmp->nm_numgrps < NGROUPS ? nmp->nm_numgrps : NGROUPS;
1098
1099			if (!cred)
1100				return (EINVAL);
1101  			groupcount = get_auxiliary_groups(cred, grouplist, count);
1102			if (groupcount < 0)
1103				return (EINVAL);
1104 			auth_len = ((uint32_t)groupcount + 5) * NFSX_UNSIGNED;
1105			break;
1106		    }
1107		case RPCAUTH_KRB5:
1108		case RPCAUTH_KRB5I:
1109		case RPCAUTH_KRB5P:
1110			if (!req || !cred)
1111				return (EINVAL);
1112			auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now
1113			break;
1114		default:
1115			return (EINVAL);
1116		}
1117	authsiz = nfsm_rndup(auth_len);
1118
1119	/* allocate the packet */
1120	headlen = authsiz + 10 * NFSX_UNSIGNED;
1121	if (sotype == SOCK_STREAM) /* also include room for any RPC Record Mark */
1122		headlen += NFSX_UNSIGNED;
1123	if (headlen >= nfs_mbuf_minclsize) {
1124		error = mbuf_getpacket(MBUF_WAITOK, &mreq);
1125	} else {
1126		error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mreq);
1127		if (!error) {
1128			if (headlen < nfs_mbuf_mhlen)
1129				mbuf_align_32(mreq, headlen);
1130			else
1131				mbuf_align_32(mreq, 8 * NFSX_UNSIGNED);
1132		}
1133	}
1134	if (error) {
1135		/* unable to allocate packet */
1136		/* XXX should we keep statistics for these errors? */
1137		return (error);
1138	}
1139
1140	/*
1141	 * If the caller gave us a non-zero XID then use it because
1142	 * it may be a higher-level resend with a GSSAPI credential.
1143	 * Otherwise, allocate a new one.
1144	 */
1145	if (*xidp == 0)
1146		nfs_get_xid(xidp);
1147
1148	/* build the header(s) */
1149	nfsm_chain_init(&nmreq, mreq);
1150
1151	/* First, if it's a TCP stream insert space for an RPC record mark */
1152	if (sotype == SOCK_STREAM)
1153		nfsm_chain_add_32(error, &nmreq, 0);
1154
1155	/* Then the RPC header. */
1156	nfsm_chain_add_32(error, &nmreq, (*xidp & 0xffffffff));
1157	nfsm_chain_add_32(error, &nmreq, RPC_CALL);
1158	nfsm_chain_add_32(error, &nmreq, RPC_VER2);
1159	nfsm_chain_add_32(error, &nmreq, prog);
1160	nfsm_chain_add_32(error, &nmreq, vers);
1161	nfsm_chain_add_32(error, &nmreq, proc);
1162
1163add_cred:
1164	switch (auth_type) {
1165	case RPCAUTH_NONE:
1166		nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE); /* auth */
1167		nfsm_chain_add_32(error, &nmreq, 0);		/* length */
1168		nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE);	/* verf */
1169		nfsm_chain_add_32(error, &nmreq, 0);		/* length */
1170		nfsm_chain_build_done(error, &nmreq);
1171		/* Append the args mbufs */
1172		if (!error)
1173			error = mbuf_setnext(nmreq.nmc_mcur, mrest);
1174		break;
1175	case RPCAUTH_SYS: {
1176		nfsm_chain_add_32(error, &nmreq, RPCAUTH_SYS);
1177		nfsm_chain_add_32(error, &nmreq, authsiz);
1178		nfsm_chain_add_32(error, &nmreq, 0);	/* stamp */
1179		nfsm_chain_add_32(error, &nmreq, 0);	/* zero-length hostname */
1180		nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(cred));	/* UID */
1181		nfsm_chain_add_32(error, &nmreq, kauth_cred_getgid(cred));	/* GID */
1182		nfsm_chain_add_32(error, &nmreq, groupcount);/* additional GIDs */
1183		for (i = 0; i < groupcount; i++)
1184			nfsm_chain_add_32(error, &nmreq, grouplist[i]);
1185
1186		/* And the verifier... */
1187		nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE);	/* flavor */
1188		nfsm_chain_add_32(error, &nmreq, 0);		/* length */
1189		nfsm_chain_build_done(error, &nmreq);
1190
1191		/* Append the args mbufs */
1192		if (!error)
1193			error = mbuf_setnext(nmreq.nmc_mcur, mrest);
1194		break;
1195	}
1196	case RPCAUTH_KRB5:
1197	case RPCAUTH_KRB5I:
1198	case RPCAUTH_KRB5P:
1199		error = nfs_gss_clnt_cred_put(req, &nmreq, mrest);
1200		if (error == ENEEDAUTH) {
1201			int count = nmp->nm_numgrps < NGROUPS ? nmp->nm_numgrps : NGROUPS;
1202
1203			/*
1204			 * Use sec=sys for this user
1205			 */
1206			error = 0;
1207			req->r_auth = auth_type = RPCAUTH_SYS;
1208 			groupcount = get_auxiliary_groups(cred, grouplist, count);
1209			if (groupcount < 0)
1210				return (EINVAL);
1211 			auth_len = ((uint32_t)groupcount + 5) * NFSX_UNSIGNED;
1212			authsiz = nfsm_rndup(auth_len);
1213			goto add_cred;
1214		}
1215		break;
1216	};
1217
1218	/* finish setting up the packet */
1219	if (!error)
1220		error = mbuf_pkthdr_setrcvif(mreq, 0);
1221
1222	if (error) {
1223		mbuf_freem(mreq);
1224		return (error);
1225	}
1226
1227	/* Calculate the size of the request */
1228	reqlen = 0;
1229	for (mb = nmreq.nmc_mhead; mb; mb = mbuf_next(mb))
1230		reqlen += mbuf_len(mb);
1231
1232	mbuf_pkthdr_setlen(mreq, reqlen);
1233
1234	/*
1235	 * If the request goes on a TCP stream,
1236	 * set its size in the RPC record mark.
1237	 * The record mark count doesn't include itself
1238	 * and the last fragment bit is set.
1239	 */
1240	if (sotype == SOCK_STREAM)
1241		nfsm_chain_set_recmark(error, &nmreq,
1242			(reqlen - NFSX_UNSIGNED) | 0x80000000);
1243
1244	*mreqp = mreq;
1245	return (0);
1246}
1247
1248/*
1249 * Parse an NFS file attribute structure out of an mbuf chain.
1250 */
1251int
1252nfs_parsefattr(struct nfsm_chain *nmc, int nfsvers, struct nfs_vattr *nvap)
1253{
1254	int error = 0;
1255	enum vtype vtype;
1256	nfstype nvtype;
1257	u_short vmode;
1258	uint32_t val, val2;
1259	dev_t rdev;
1260
1261	val = val2 = 0;
1262	NVATTR_INIT(nvap);
1263
1264	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TYPE);
1265	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_MODE);
1266	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS);
1267	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER);
1268	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP);
1269	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SIZE);
1270	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SPACE_USED);
1271	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_RAWDEV);
1272	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FSID);
1273	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FILEID);
1274	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_ACCESS);
1275	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_MODIFY);
1276	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_METADATA);
1277
1278	nfsm_chain_get_32(error, nmc, nvtype);
1279	nfsm_chain_get_32(error, nmc, vmode);
1280	nfsmout_if(error);
1281
1282	if (nfsvers == NFS_VER3) {
1283		nvap->nva_type = vtype = nfstov_type(nvtype, nfsvers);
1284	} else {
1285		/*
1286		 * The duplicate information returned in fa_type and fa_mode
1287		 * is an ambiguity in the NFS version 2 protocol.
1288		 *
1289		 * VREG should be taken literally as a regular file.  If a
1290		 * server intends to return some type information differently
1291		 * in the upper bits of the mode field (e.g. for sockets, or
1292		 * FIFOs), NFSv2 mandates fa_type to be VNON.  Anyway, we
1293		 * leave the examination of the mode bits even in the VREG
1294		 * case to avoid breakage for bogus servers, but we make sure
1295		 * that there are actually type bits set in the upper part of
1296		 * fa_mode (and failing that, trust the va_type field).
1297		 *
1298		 * NFSv3 cleared the issue, and requires fa_mode to not
1299		 * contain any type information (while also introducing
1300		 * sockets and FIFOs for fa_type).
1301		 */
1302		vtype = nfstov_type(nvtype, nfsvers);
1303		if ((vtype == VNON) || ((vtype == VREG) && ((vmode & S_IFMT) != 0)))
1304			vtype = IFTOVT(vmode);
1305		nvap->nva_type = vtype;
1306	}
1307
1308	nvap->nva_mode = (vmode & 07777);
1309
1310	nfsm_chain_get_32(error, nmc, nvap->nva_nlink);
1311	nfsm_chain_get_32(error, nmc, nvap->nva_uid);
1312	nfsm_chain_get_32(error, nmc, nvap->nva_gid);
1313
1314	if (nfsvers == NFS_VER3) {
1315		nfsm_chain_get_64(error, nmc, nvap->nva_size);
1316		nfsm_chain_get_64(error, nmc, nvap->nva_bytes);
1317		nfsm_chain_get_32(error, nmc, nvap->nva_rawdev.specdata1);
1318		nfsm_chain_get_32(error, nmc, nvap->nva_rawdev.specdata2);
1319		nfsmout_if(error);
1320		nfsm_chain_get_64(error, nmc, nvap->nva_fsid.major);
1321		nvap->nva_fsid.minor = 0;
1322		nfsm_chain_get_64(error, nmc, nvap->nva_fileid);
1323	} else {
1324		nfsm_chain_get_32(error, nmc, nvap->nva_size);
1325		nfsm_chain_adv(error, nmc, NFSX_UNSIGNED);
1326		nfsm_chain_get_32(error, nmc, rdev);
1327		nfsmout_if(error);
1328		nvap->nva_rawdev.specdata1 = major(rdev);
1329		nvap->nva_rawdev.specdata2 = minor(rdev);
1330		nfsm_chain_get_32(error, nmc, val); /* blocks */
1331		nfsmout_if(error);
1332		nvap->nva_bytes = val * NFS_FABLKSIZE;
1333		nfsm_chain_get_32(error, nmc, val);
1334		nfsmout_if(error);
1335		nvap->nva_fsid.major = (uint64_t)val;
1336		nvap->nva_fsid.minor = 0;
1337		nfsm_chain_get_32(error, nmc, val);
1338		nfsmout_if(error);
1339		nvap->nva_fileid = (uint64_t)val;
1340		/* Really ugly NFSv2 kludge. */
1341		if ((vtype == VCHR) && (rdev == (dev_t)0xffffffff))
1342			nvap->nva_type = VFIFO;
1343	}
1344	nfsm_chain_get_time(error, nmc, nfsvers,
1345		nvap->nva_timesec[NFSTIME_ACCESS],
1346		nvap->nva_timensec[NFSTIME_ACCESS]);
1347	nfsm_chain_get_time(error, nmc, nfsvers,
1348		nvap->nva_timesec[NFSTIME_MODIFY],
1349		nvap->nva_timensec[NFSTIME_MODIFY]);
1350	nfsm_chain_get_time(error, nmc, nfsvers,
1351		nvap->nva_timesec[NFSTIME_CHANGE],
1352		nvap->nva_timensec[NFSTIME_CHANGE]);
1353nfsmout:
1354	return (error);
1355}
1356
1357/*
1358 * Load the attribute cache (that lives in the nfsnode entry) with
1359 * the value pointed to by nvap, unless the file type in the attribute
1360 * cache doesn't match the file type in the nvap, in which case log a
1361 * warning and return ESTALE.
1362 *
1363 * If the dontshrink flag is set, then it's not safe to call ubc_setsize()
1364 * to shrink the size of the file.
1365 */
1366int
1367nfs_loadattrcache(
1368	nfsnode_t np,
1369	struct nfs_vattr *nvap,
1370	u_int64_t *xidp,
1371	int dontshrink)
1372{
1373	mount_t mp;
1374	vnode_t vp;
1375	struct timeval now;
1376	struct nfs_vattr *npnvap;
1377	int xattr = np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR;
1378	int referral = np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL;
1379	int aclbit, monitored, error = 0;
1380	kauth_acl_t acl;
1381	struct nfsmount *nmp;
1382	uint32_t events = np->n_events;
1383
1384	if (np->n_hflag & NHINIT) {
1385		vp = NULL;
1386		mp = np->n_mount;
1387	} else {
1388		vp = NFSTOV(np);
1389		mp = vnode_mount(vp);
1390	}
1391	monitored = vp ? vnode_ismonitored(vp) : 0;
1392
1393	FSDBG_TOP(527, np, vp, *xidp >> 32, *xidp);
1394
1395	if (!((nmp = VFSTONFS(mp)))) {
1396		FSDBG_BOT(527, ENXIO, 1, 0, *xidp);
1397		return (ENXIO);
1398	}
1399
1400	if (*xidp < np->n_xid) {
1401		/*
1402		 * We have already updated attributes with a response from
1403		 * a later request.  The attributes we have here are probably
1404		 * stale so we drop them (just return).  However, our
1405		 * out-of-order receipt could be correct - if the requests were
1406		 * processed out of order at the server.  Given the uncertainty
1407		 * we invalidate our cached attributes.  *xidp is zeroed here
1408		 * to indicate the attributes were dropped - only getattr
1409		 * cares - it needs to retry the rpc.
1410		 */
1411		NATTRINVALIDATE(np);
1412		FSDBG_BOT(527, 0, np, np->n_xid, *xidp);
1413		*xidp = 0;
1414		return (0);
1415	}
1416
1417	if (vp && (nvap->nva_type != vnode_vtype(vp))) {
1418		/*
1419		 * The filehandle has changed type on us.  This can be
1420		 * caused by either the server not having unique filehandles
1421		 * or because another client has removed the previous
1422		 * filehandle and a new object (of a different type)
1423		 * has been created with the same filehandle.
1424		 *
1425		 * We can't simply switch the type on the vnode because
1426		 * there may be type-specific fields that need to be
1427		 * cleaned up or set up.
1428		 *
1429		 * So, what should we do with this vnode?
1430		 *
1431		 * About the best we can do is log a warning and return
1432		 * an error.  ESTALE is about the closest error, but it
1433		 * is a little strange that we come up with this error
1434		 * internally instead of simply passing it through from
1435		 * the server.  Hopefully, the vnode will be reclaimed
1436		 * soon so the filehandle can be reincarnated as the new
1437		 * object type.
1438		 */
1439		printf("nfs loadattrcache vnode changed type, was %d now %d\n",
1440			vnode_vtype(vp), nvap->nva_type);
1441		error = ESTALE;
1442		if (monitored)
1443			events |= VNODE_EVENT_DELETE;
1444		goto out;
1445	}
1446
1447	npnvap = &np->n_vattr;
1448
1449	/*
1450	 * The ACL cache needs special handling because it is not
1451	 * always updated.  Save current ACL cache state so it can
1452	 * be restored after copying the new attributes into place.
1453	 */
1454	aclbit = NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL);
1455	acl = npnvap->nva_acl;
1456
1457	if (monitored) {
1458		/*
1459		 * For monitored nodes, check for attribute changes that should generate events.
1460		 */
1461		if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS) &&
1462		    (nvap->nva_nlink != npnvap->nva_nlink))
1463			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_LINK;
1464		if (events & VNODE_EVENT_PERMS)
1465			/* no need to do all the checking if it's already set */;
1466		else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_MODE) &&
1467			 (nvap->nva_mode != npnvap->nva_mode))
1468			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
1469		else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER) &&
1470			 (nvap->nva_uid != npnvap->nva_uid))
1471			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
1472		else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP) &&
1473			 (nvap->nva_gid != npnvap->nva_gid))
1474			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
1475		else if (nmp->nm_vers >= NFS_VER4) {
1476			if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER) &&
1477			    !kauth_guid_equal(&nvap->nva_uuuid, &npnvap->nva_uuuid))
1478				events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
1479			else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP) &&
1480				 !kauth_guid_equal(&nvap->nva_guuid, &npnvap->nva_guuid))
1481				events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
1482			else if ((NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_ACL) &&
1483				 nvap->nva_acl && npnvap->nva_acl &&
1484			         ((nvap->nva_acl->acl_entrycount != npnvap->nva_acl->acl_entrycount) ||
1485			          bcmp(nvap->nva_acl, npnvap->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl)))))
1486				events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
1487		}
1488		if (((nmp->nm_vers >= NFS_VER4) && (nvap->nva_change != npnvap->nva_change)) ||
1489		   (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_MODIFY) &&
1490		    ((nvap->nva_timesec[NFSTIME_MODIFY] != npnvap->nva_timesec[NFSTIME_MODIFY]) ||
1491		     (nvap->nva_timensec[NFSTIME_MODIFY] != npnvap->nva_timensec[NFSTIME_MODIFY]))))
1492			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_WRITE;
1493		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_RAWDEV) &&
1494		    ((nvap->nva_rawdev.specdata1 != npnvap->nva_rawdev.specdata1) ||
1495		     (nvap->nva_rawdev.specdata2 != npnvap->nva_rawdev.specdata2)))
1496			events |= VNODE_EVENT_ATTRIB;
1497		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_FILEID) &&
1498		    (nvap->nva_fileid != npnvap->nva_fileid))
1499			events |= VNODE_EVENT_ATTRIB;
1500		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ARCHIVE) &&
1501		    ((nvap->nva_flags & NFS_FFLAG_ARCHIVED) != (npnvap->nva_flags & NFS_FFLAG_ARCHIVED)))
1502			events |= VNODE_EVENT_ATTRIB;
1503		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_HIDDEN) &&
1504		    ((nvap->nva_flags & NFS_FFLAG_HIDDEN) != (npnvap->nva_flags & NFS_FFLAG_HIDDEN)))
1505			events |= VNODE_EVENT_ATTRIB;
1506		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_CREATE) &&
1507		    ((nvap->nva_timesec[NFSTIME_CREATE] != npnvap->nva_timesec[NFSTIME_CREATE]) ||
1508		     (nvap->nva_timensec[NFSTIME_CREATE] != npnvap->nva_timensec[NFSTIME_CREATE])))
1509			events |= VNODE_EVENT_ATTRIB;
1510		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_BACKUP) &&
1511		    ((nvap->nva_timesec[NFSTIME_BACKUP] != npnvap->nva_timesec[NFSTIME_BACKUP]) ||
1512		     (nvap->nva_timensec[NFSTIME_BACKUP] != npnvap->nva_timensec[NFSTIME_BACKUP])))
1513			events |= VNODE_EVENT_ATTRIB;
1514	}
1515
1516	/* Copy the attributes to the attribute cache */
1517	bcopy((caddr_t)nvap, (caddr_t)npnvap, sizeof(*nvap));
1518
1519	microuptime(&now);
1520	np->n_attrstamp = now.tv_sec;
1521	np->n_xid = *xidp;
1522	/* NFS_FFLAG_IS_ATTR and NFS_FFLAG_TRIGGER_REFERRAL need to be sticky... */
1523	if (vp && xattr)
1524		nvap->nva_flags |= xattr;
1525	if (vp && referral)
1526		nvap->nva_flags |= referral;
1527
1528	if (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL)) {
1529		/* we're updating the ACL */
1530		if (nvap->nva_acl) {
1531			/* make a copy of the acl for the cache */
1532			npnvap->nva_acl = kauth_acl_alloc(nvap->nva_acl->acl_entrycount);
1533			if (npnvap->nva_acl) {
1534				bcopy(nvap->nva_acl, npnvap->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl));
1535			} else {
1536				/* can't make a copy to cache, invalidate ACL cache */
1537				NFS_BITMAP_CLR(npnvap->nva_bitmap, NFS_FATTR_ACL);
1538				NACLINVALIDATE(np);
1539				aclbit = 0;
1540			}
1541		}
1542		if (acl) {
1543			kauth_acl_free(acl);
1544			acl = NULL;
1545		}
1546	}
1547	if (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL)) {
1548		/* update the ACL timestamp */
1549		np->n_aclstamp = now.tv_sec;
1550	} else {
1551		/* we aren't updating the ACL, so restore original values */
1552		if (aclbit)
1553			NFS_BITMAP_SET(npnvap->nva_bitmap, NFS_FATTR_ACL);
1554		npnvap->nva_acl = acl;
1555	}
1556
1557#if CONFIG_TRIGGERS
1558	/*
1559	 * For NFSv4, if the fsid doesn't match the fsid for the mount, then
1560	 * this node is for a different file system on the server.  So we mark
1561	 * this node as a trigger node that will trigger the mirror mount.
1562	 */
1563	if ((nmp->nm_vers >= NFS_VER4) && (nvap->nva_type == VDIR) &&
1564	    ((np->n_vattr.nva_fsid.major != nmp->nm_fsid.major) ||
1565	     (np->n_vattr.nva_fsid.minor != nmp->nm_fsid.minor)))
1566		np->n_vattr.nva_flags |= NFS_FFLAG_TRIGGER;
1567#endif
1568
1569	if (!vp || (nvap->nva_type != VREG)) {
1570		np->n_size = nvap->nva_size;
1571	} else if (nvap->nva_size != np->n_size) {
1572		FSDBG(527, np, nvap->nva_size, np->n_size, (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4));
1573		if (!UBCINFOEXISTS(vp) || (dontshrink && (nvap->nva_size < np->n_size))) {
1574			/* asked not to shrink, so stick with current size */
1575			FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
1576			nvap->nva_size = np->n_size;
1577			NATTRINVALIDATE(np);
1578		} else if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) {
1579			/* if we've modified, stick with larger size */
1580			FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
1581			nvap->nva_size = np->n_size;
1582			npnvap->nva_size = np->n_size;
1583		} else {
1584			/*
1585			 * n_size is protected by the data lock, so we need to
1586			 * defer updating it until it's safe.  We save the new size
1587			 * and set a flag and it'll get updated the next time we get/drop
1588			 * the data lock or the next time we do a getattr.
1589			 */
1590			np->n_newsize = nvap->nva_size;
1591			SET(np->n_flag, NUPDATESIZE);
1592			if (monitored)
1593				events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_EXTEND;
1594		}
1595	}
1596
1597	if (np->n_flag & NCHG) {
1598		if (np->n_flag & NACC) {
1599			nvap->nva_timesec[NFSTIME_ACCESS] = np->n_atim.tv_sec;
1600			nvap->nva_timensec[NFSTIME_ACCESS] = np->n_atim.tv_nsec;
1601		}
1602		if (np->n_flag & NUPD) {
1603			nvap->nva_timesec[NFSTIME_MODIFY] = np->n_mtim.tv_sec;
1604			nvap->nva_timensec[NFSTIME_MODIFY] = np->n_mtim.tv_nsec;
1605		}
1606	}
1607
1608out:
1609	if (monitored && events)
1610		nfs_vnode_notify(np, events);
1611	FSDBG_BOT(527, error, np, np->n_size, *xidp);
1612	return (error);
1613}
1614
1615/*
1616 * Calculate the attribute timeout based on
1617 * how recently the file has been modified.
1618 */
1619int
1620nfs_attrcachetimeout(nfsnode_t np)
1621{
1622	struct nfsmount *nmp;
1623	struct timeval now;
1624	int isdir;
1625	uint32_t timeo;
1626
1627	nmp = NFSTONMP(np);
1628	if (nfs_mount_gone(nmp))
1629		return (0);
1630
1631	isdir = vnode_isdir(NFSTOV(np));
1632
1633	if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) {
1634		/* If we have a delegation, we always use the max timeout. */
1635		timeo = isdir ? nmp->nm_acdirmax : nmp->nm_acregmax;
1636	} else if ((np)->n_flag & NMODIFIED) {
1637		/* If we have modifications, we always use the min timeout. */
1638		timeo = isdir ? nmp->nm_acdirmin : nmp->nm_acregmin;
1639	} else {
1640		/* Otherwise, we base the timeout on how old the file seems. */
1641		/* Note that if the client and server clocks are way out of sync, */
1642		/* timeout will probably get clamped to a min or max value */
1643		microtime(&now);
1644		timeo = (now.tv_sec - (np)->n_vattr.nva_timesec[NFSTIME_MODIFY]) / 10;
1645		if (isdir) {
1646			if (timeo < nmp->nm_acdirmin)
1647				timeo = nmp->nm_acdirmin;
1648			else if (timeo > nmp->nm_acdirmax)
1649				timeo = nmp->nm_acdirmax;
1650		} else {
1651			if (timeo < nmp->nm_acregmin)
1652				timeo = nmp->nm_acregmin;
1653			else if (timeo > nmp->nm_acregmax)
1654				timeo = nmp->nm_acregmax;
1655		}
1656	}
1657
1658	return (timeo);
1659}
1660
1661/*
1662 * Check the attribute cache time stamp.
1663 * If the cache is valid, copy contents to *nvaper and return 0
1664 * otherwise return an error.
1665 * Must be called with the node locked.
1666 */
1667int
1668nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int flags)
1669{
1670	struct nfs_vattr *nvap;
1671	struct timeval nowup;
1672	int32_t timeo;
1673	struct nfsmount *nmp;
1674
1675	/* Check if the attributes are valid. */
1676	if (!NATTRVALID(np) || ((flags & NGA_ACL) && !NACLVALID(np))) {
1677		FSDBG(528, np, 0, 0xffffff01, ENOENT);
1678		OSAddAtomic64(1, &nfsstats.attrcache_misses);
1679		return (ENOENT);
1680	}
1681
1682	nmp = NFSTONMP(np);
1683	if (nfs_mount_gone(nmp))
1684		return (ENXIO);
1685	/*
1686	 * Verify the cached attributes haven't timed out.
1687	 * If the server isn't responding, skip the check
1688	 * and return cached attributes.
1689	 */
1690	if (!nfs_use_cache(nmp)) {
1691		timeo = nfs_attrcachetimeout(np);
1692		microuptime(&nowup);
1693		if ((nowup.tv_sec - np->n_attrstamp) >= timeo) {
1694			FSDBG(528, np, 0, 0xffffff02, ENOENT);
1695			OSAddAtomic64(1, &nfsstats.attrcache_misses);
1696			return (ENOENT);
1697		}
1698		if ((flags & NGA_ACL) && ((nowup.tv_sec - np->n_aclstamp) >= timeo)) {
1699			FSDBG(528, np, 0, 0xffffff02, ENOENT);
1700			OSAddAtomic64(1, &nfsstats.attrcache_misses);
1701			return (ENOENT);
1702		}
1703	}
1704
1705	nvap = &np->n_vattr;
1706	FSDBG(528, np, nvap->nva_size, np->n_size, 0xcace);
1707	OSAddAtomic64(1, &nfsstats.attrcache_hits);
1708
1709	if (nvap->nva_type != VREG) {
1710		np->n_size = nvap->nva_size;
1711	} else if (nvap->nva_size != np->n_size) {
1712		FSDBG(528, np, nvap->nva_size, np->n_size, (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4));
1713		if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) {
1714			/* if we've modified, stick with larger size */
1715			nvap->nva_size = np->n_size;
1716		} else {
1717			/*
1718			 * n_size is protected by the data lock, so we need to
1719			 * defer updating it until it's safe.  We save the new size
1720			 * and set a flag and it'll get updated the next time we get/drop
1721			 * the data lock or the next time we do a getattr.
1722			 */
1723			np->n_newsize = nvap->nva_size;
1724			SET(np->n_flag, NUPDATESIZE);
1725		}
1726	}
1727
1728	bcopy((caddr_t)nvap, (caddr_t)nvaper, sizeof(struct nfs_vattr));
1729	if (np->n_flag & NCHG) {
1730		if (np->n_flag & NACC) {
1731			nvaper->nva_timesec[NFSTIME_ACCESS] = np->n_atim.tv_sec;
1732			nvaper->nva_timensec[NFSTIME_ACCESS] = np->n_atim.tv_nsec;
1733		}
1734		if (np->n_flag & NUPD) {
1735			nvaper->nva_timesec[NFSTIME_MODIFY] = np->n_mtim.tv_sec;
1736			nvaper->nva_timensec[NFSTIME_MODIFY] = np->n_mtim.tv_nsec;
1737		}
1738	}
1739	if (nvap->nva_acl) {
1740		if (flags & NGA_ACL) {
1741			nvaper->nva_acl = kauth_acl_alloc(nvap->nva_acl->acl_entrycount);
1742			if (!nvaper->nva_acl)
1743				return (ENOMEM);
1744			bcopy(nvap->nva_acl, nvaper->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl));
1745		} else {
1746			nvaper->nva_acl = NULL;
1747		}
1748	}
1749	return (0);
1750}
1751
1752/*
1753 * When creating file system objects:
1754 * Don't bother setting UID if it's the same as the credential performing the create.
1755 * Don't bother setting GID if it's the same as the directory or credential.
1756 */
1757void
1758nfs_avoid_needless_id_setting_on_create(nfsnode_t dnp, struct vnode_attr *vap, vfs_context_t ctx)
1759{
1760	if (VATTR_IS_ACTIVE(vap, va_uid)) {
1761		if (kauth_cred_getuid(vfs_context_ucred(ctx)) == vap->va_uid) {
1762			VATTR_CLEAR_ACTIVE(vap, va_uid);
1763			VATTR_CLEAR_ACTIVE(vap, va_uuuid);
1764		}
1765	}
1766	if (VATTR_IS_ACTIVE(vap, va_gid)) {
1767		if ((vap->va_gid == dnp->n_vattr.nva_gid) ||
1768		    (kauth_cred_getgid(vfs_context_ucred(ctx)) == vap->va_gid)) {
1769			VATTR_CLEAR_ACTIVE(vap, va_gid);
1770			VATTR_CLEAR_ACTIVE(vap, va_guuid);
1771		}
1772	}
1773}
1774
1775/*
1776 * Convert a universal address string to a sockaddr structure.
1777 *
1778 * Universal addresses can be in the following formats:
1779 *
1780 * d = decimal (IPv4)
1781 * x = hexadecimal (IPv6)
1782 * p = port (decimal)
1783 *
1784 * d.d.d.d
1785 * d.d.d.d.p.p
1786 * x:x:x:x:x:x:x:x
1787 * x:x:x:x:x:x:x:x.p.p
1788 * x:x:x:x:x:x:d.d.d.d
1789 * x:x:x:x:x:x:d.d.d.d.p.p
1790 *
1791 * IPv6 strings can also have a series of zeroes elided
1792 * IPv6 strings can also have a %scope suffix at the end (after any port)
1793 *
1794 * rules & exceptions:
1795 * - value before : is hex
1796 * - value before . is dec
1797 * - once . hit, all values are dec
1798 * - hex+port case means value before first dot is actually hex
1799 * - . is always preceded by digits except if last hex was double-colon
1800 *
1801 * scan, converting #s to bytes
1802 * first time a . is encountered, scan the rest to count them.
1803 * 2 dots = just port
1804 * 3 dots = just IPv4 no port
1805 * 5 dots = IPv4 and port
1806 */
1807
1808#define IS_DIGIT(C) \
1809	(((C) >= '0') && ((C) <= '9'))
1810
1811#define IS_XDIGIT(C) \
1812	(IS_DIGIT(C) || \
1813	 (((C) >= 'A') && ((C) <= 'F')) || \
1814	 (((C) >= 'a') && ((C) <= 'f')))
1815
1816int
1817nfs_uaddr2sockaddr(const char *uaddr, struct sockaddr *addr)
1818{
1819	const char *p, *pd;	/* pointers to current character in scan */
1820	const char *pnum;	/* pointer to current number to decode */
1821	const char *pscope;	/* pointer to IPv6 scope ID */
1822	uint8_t a[18];		/* octet array to store address bytes */
1823	int i;			/* index of next octet to decode */
1824	int dci;		/* index of octet to insert double-colon zeroes */
1825	int dcount, xdcount;	/* count of digits in current number */
1826	int needmore;		/* set when we know we need more input (e.g. after colon, period) */
1827	int dots;		/* # of dots */
1828	int hex;		/* contains hex values */
1829	unsigned long val;	/* decoded value */
1830	int s;			/* index used for sliding array to insert elided zeroes */
1831
1832#define HEXVALUE	0
1833#define DECIMALVALUE	1
1834#define GET(TYPE) \
1835	do { \
1836		if ((dcount <= 0) || (dcount > (((TYPE) == DECIMALVALUE) ? 3 : 4))) \
1837			return (0); \
1838		if (((TYPE) == DECIMALVALUE) && xdcount) \
1839			return (0); \
1840		val = strtoul(pnum, NULL, ((TYPE) == DECIMALVALUE) ? 10 : 16); \
1841		if (((TYPE) == DECIMALVALUE) && (val >= 256)) \
1842			return (0); \
1843		/* check if there is room left in the array */ \
1844		if (i > (int)(sizeof(a) - (((TYPE) == HEXVALUE) ? 2 : 1) - ((dci != -1) ? 2 : 0))) \
1845			return (0); \
1846		if ((TYPE) == HEXVALUE) \
1847			a[i++] = ((val >> 8) & 0xff); \
1848		a[i++] = (val & 0xff); \
1849	} while (0)
1850
1851	hex = 0;
1852	dots = 0;
1853	dci = -1;
1854	i = dcount = xdcount = 0;
1855	pnum = p = uaddr;
1856	pscope = NULL;
1857	needmore = 1;
1858	if ((*p == ':') && (*++p != ':')) /* if it starts with colon, gotta be a double */
1859		return (0);
1860
1861	while (*p) {
1862		if (IS_XDIGIT(*p)) {
1863			dcount++;
1864			if (!IS_DIGIT(*p))
1865				xdcount++;
1866			needmore = 0;
1867			p++;
1868		} else if (*p == '.') {
1869			/* rest is decimal IPv4 dotted quad and/or port */
1870			if (!dots) {
1871				/* this is the first, so count them */
1872				for (pd = p; *pd; pd++) {
1873					if (*pd == '.') {
1874						if (++dots > 5)
1875							return (0);
1876					} else if (hex && (*pd == '%')) {
1877						break;
1878					} else if ((*pd < '0') || (*pd > '9')) {
1879						return (0);
1880					}
1881				}
1882				if ((dots != 2) && (dots != 3) && (dots != 5))
1883					return (0);
1884				if (hex && (dots == 2)) { /* hex+port */
1885					if (!dcount && needmore)
1886						return (0);
1887					if (dcount) /* last hex may be elided zero */
1888						GET(HEXVALUE);
1889				} else {
1890					GET(DECIMALVALUE);
1891				}
1892			} else {
1893				GET(DECIMALVALUE);
1894			}
1895			dcount = xdcount = 0;
1896			needmore = 1;
1897			pnum = ++p;
1898		} else if (*p == ':') {
1899			hex = 1;
1900			if (dots)
1901				return (0);
1902			if (!dcount) { /* missing number, probably double colon */
1903				if (dci >= 0) /* can only have one double colon */
1904					return (0);
1905				dci = i;
1906				needmore = 0;
1907			} else {
1908				GET(HEXVALUE);
1909				dcount = xdcount = 0;
1910				needmore = 1;
1911			}
1912			pnum = ++p;
1913		} else if (*p == '%') { /* scope ID delimiter */
1914			if (!hex)
1915				return (0);
1916			p++;
1917			pscope = p;
1918			break;
1919		} else { /* unexpected character */
1920			return (0);
1921		}
1922	}
1923	if (needmore && !dcount)
1924		return (0);
1925	if (dcount) /* decode trailing number */
1926		GET(dots ? DECIMALVALUE : HEXVALUE);
1927	if (dci >= 0) {  /* got a double-colon at i, need to insert a range of zeroes */
1928		/* if we got a port, slide to end of array */
1929		/* otherwise, slide to end of address (non-port) values */
1930		int end = ((dots == 2) || (dots == 5)) ? sizeof(a) : (sizeof(a) - 2);
1931		if (i % 2) /* length of zero range must be multiple of 2 */
1932			return (0);
1933		if (i >= end) /* no room? */
1934			return (0);
1935		/* slide (i-dci) numbers up from index dci */
1936		for (s=0; s < (i - dci); s++)
1937			a[end-1-s] = a[i-1-s];
1938		/* zero (end-i) numbers at index dci */
1939		for (s=0; s < (end - i); s++)
1940			a[dci+s] = 0;
1941		i = end;
1942	}
1943
1944	/* copy out resulting socket address */
1945	if (hex) {
1946		struct sockaddr_in6 *sin6 = (struct sockaddr_in6*)addr;
1947		if ((((dots == 0) || (dots == 3)) && (i != (sizeof(a)-2))))
1948			return (0);
1949		if ((((dots == 2) || (dots == 5)) && (i != sizeof(a))))
1950			return (0);
1951		bzero(sin6, sizeof(struct sockaddr_in6));
1952		sin6->sin6_len = sizeof(struct sockaddr_in6);
1953		sin6->sin6_family = AF_INET6;
1954		bcopy(a, &sin6->sin6_addr.s6_addr, sizeof(struct in6_addr));
1955		if ((dots == 5) || (dots == 2))
1956			sin6->sin6_port = htons((a[16] << 8) | a[17]);
1957		if (pscope) {
1958			for (p=pscope; IS_DIGIT(*p); p++)
1959				;
1960			if (*p && !IS_DIGIT(*p)) { /* name */
1961				ifnet_t interface = NULL;
1962				if (ifnet_find_by_name(pscope, &interface) == 0)
1963					sin6->sin6_scope_id = ifnet_index(interface);
1964				if (interface)
1965					ifnet_release(interface);
1966			} else { /* decimal number */
1967				sin6->sin6_scope_id = strtoul(pscope, NULL, 10);
1968			}
1969			/* XXX should we also embed scope id for linklocal? */
1970		}
1971	} else {
1972		struct sockaddr_in *sin = (struct sockaddr_in*)addr;
1973		if ((dots != 3) && (dots != 5))
1974			return (0);
1975		if ((dots == 3) && (i != 4))
1976			return (0);
1977		if ((dots == 5) && (i != 6))
1978			return (0);
1979		bzero(sin, sizeof(struct sockaddr_in));
1980		sin->sin_len = sizeof(struct sockaddr_in);
1981		sin->sin_family = AF_INET;
1982		bcopy(a, &sin->sin_addr.s_addr, sizeof(struct in_addr));
1983		if (dots == 5)
1984			sin->sin_port = htons((a[4] << 8) | a[5]);
1985	}
1986	return (1);
1987}
1988
1989
1990/* NFS Client debugging support */
1991uint32_t nfs_debug_ctl;
1992
1993#include <libkern/libkern.h>
1994#include <stdarg.h>
1995
1996void
1997nfs_printf(int facility, int level, const char *fmt, ...)
1998{
1999	va_list ap;
2000
2001	if ((uint32_t)level > NFS_DEBUG_LEVEL)
2002		return;
2003	if (NFS_DEBUG_FACILITY && !((uint32_t)facility & NFS_DEBUG_FACILITY))
2004		return;
2005
2006	va_start(ap, fmt);
2007	vprintf(fmt, ap);
2008	va_end(ap);
2009}
2010
2011/* Is a mount gone away? */
2012int
2013nfs_mount_gone(struct nfsmount *nmp)
2014{
2015	return (!nmp || vfs_isforce(nmp->nm_mountp) || (nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD)));
2016}
2017
2018/*
2019 * Return some of the more significant mount options
2020 * as a string, e.g. "'ro,hard,intr,tcp,vers=3,sec=krb5,deadtimeout=0'
2021 */
2022int
2023nfs_mountopts(struct nfsmount *nmp, char *buf, int buflen)
2024{
2025	int c;
2026
2027	c = snprintf(buf, buflen, "%s,%s,%s,%s,vers=%d,sec=%s,%sdeadtimeout=%d",
2028		(vfs_flags(nmp->nm_mountp) & MNT_RDONLY) ? "ro" : "rw",
2029		NMFLAG(nmp, SOFT) ? "soft" : "hard",
2030		NMFLAG(nmp, INTR) ? "intr" : "nointr",
2031		nmp->nm_sotype == SOCK_STREAM ? "tcp" : "udp",
2032		nmp->nm_vers,
2033		nmp->nm_auth == RPCAUTH_KRB5  ? "krb5" :
2034		nmp->nm_auth == RPCAUTH_KRB5I ? "krb5i" :
2035		nmp->nm_auth == RPCAUTH_KRB5P ? "krb5p" :
2036		nmp->nm_auth == RPCAUTH_SYS   ? "sys" : "none",
2037		nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED ?  "locks," :
2038		nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED ? "nolocks," :
2039		nmp->nm_lockmode == NFS_LOCK_MODE_LOCAL ? "locallocks," : "",
2040		nmp->nm_deadtimeout);
2041
2042	return (c > buflen ? ENOMEM : 0);
2043}
2044
2045#endif /* NFSCLIENT */
2046
2047/*
2048 * Schedule a callout thread to run an NFS timer function
2049 * interval milliseconds in the future.
2050 */
2051void
2052nfs_interval_timer_start(thread_call_t call, int interval)
2053{
2054	uint64_t deadline;
2055
2056	clock_interval_to_deadline(interval, 1000 * 1000, &deadline);
2057	thread_call_enter_delayed(call, deadline);
2058}
2059
2060
2061#if NFSSERVER
2062
2063int nfsrv_cmp_secflavs(struct nfs_sec *, struct nfs_sec *);
2064int nfsrv_hang_addrlist(struct nfs_export *, struct user_nfs_export_args *);
2065int nfsrv_free_netopt(struct radix_node *, void *);
2066int nfsrv_free_addrlist(struct nfs_export *, struct user_nfs_export_args *);
2067struct nfs_export_options *nfsrv_export_lookup(struct nfs_export *, mbuf_t);
2068struct nfs_export *nfsrv_fhtoexport(struct nfs_filehandle *);
2069struct nfs_user_stat_node *nfsrv_get_user_stat_node(struct nfs_active_user_list *, struct sockaddr *, uid_t);
2070void nfsrv_init_user_list(struct nfs_active_user_list *);
2071void nfsrv_free_user_list(struct nfs_active_user_list *);
2072
2073/*
2074 * add NFSv3 WCC data to an mbuf chain
2075 */
2076int
2077nfsm_chain_add_wcc_data_f(
2078	struct nfsrv_descript *nd,
2079	struct nfsm_chain *nmc,
2080	int preattrerr,
2081	struct vnode_attr *prevap,
2082	int postattrerr,
2083	struct vnode_attr *postvap)
2084{
2085	int error = 0;
2086
2087	if (preattrerr) {
2088		nfsm_chain_add_32(error, nmc, FALSE);
2089	} else {
2090		nfsm_chain_add_32(error, nmc, TRUE);
2091		nfsm_chain_add_64(error, nmc, prevap->va_data_size);
2092		nfsm_chain_add_time(error, nmc, NFS_VER3, &prevap->va_modify_time);
2093		nfsm_chain_add_time(error, nmc, NFS_VER3, &prevap->va_change_time);
2094	}
2095	nfsm_chain_add_postop_attr(error, nd, nmc, postattrerr, postvap);
2096
2097	return (error);
2098}
2099
2100/*
2101 * Extract a lookup path from the given mbufs and store it in
2102 * a newly allocated buffer saved in the given nameidata structure.
2103 */
2104int
2105nfsm_chain_get_path_namei(
2106	struct nfsm_chain *nmc,
2107	uint32_t len,
2108	struct nameidata *nip)
2109{
2110	struct componentname *cnp = &nip->ni_cnd;
2111	int error = 0;
2112	char *cp;
2113
2114	if (len > (MAXPATHLEN - 1))
2115		return (ENAMETOOLONG);
2116
2117	/*
2118	 * Get a buffer for the name to be translated, and copy the
2119	 * name into the buffer.
2120	 */
2121	MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
2122	if (!cnp->cn_pnbuf)
2123		return (ENOMEM);
2124	cnp->cn_pnlen = MAXPATHLEN;
2125	cnp->cn_flags |= HASBUF;
2126
2127	/* Copy the name from the mbuf list to the string */
2128	cp = cnp->cn_pnbuf;
2129	nfsm_chain_get_opaque(error, nmc, len, cp);
2130	if (error)
2131		goto out;
2132	cnp->cn_pnbuf[len] = '\0';
2133
2134	/* sanity check the string */
2135	if ((strlen(cp) != len) || strchr(cp, '/'))
2136		error = EACCES;
2137out:
2138	if (error) {
2139		if (cnp->cn_pnbuf)
2140			FREE_ZONE(cnp->cn_pnbuf, MAXPATHLEN, M_NAMEI);
2141		cnp->cn_flags &= ~HASBUF;
2142	} else {
2143		nip->ni_pathlen = len;
2144	}
2145	return (error);
2146}
2147
2148/*
2149 * Set up nameidata for a lookup() call and do it.
2150 */
2151int
2152nfsrv_namei(
2153	struct nfsrv_descript *nd,
2154	vfs_context_t ctx,
2155	struct nameidata *nip,
2156	struct nfs_filehandle *nfhp,
2157	vnode_t *retdirp,
2158	struct nfs_export **nxp,
2159	struct nfs_export_options **nxop)
2160{
2161	vnode_t dp;
2162	int error;
2163	struct componentname *cnp = &nip->ni_cnd;
2164	uint32_t cnflags;
2165	char *tmppn;
2166
2167	*retdirp = NULL;
2168
2169	/*
2170	 * Extract and set starting directory.
2171	 */
2172	error = nfsrv_fhtovp(nfhp, nd, &dp, nxp, nxop);
2173	if (error)
2174		goto out;
2175	error = nfsrv_credcheck(nd, ctx, *nxp, *nxop);
2176	if (error || (vnode_vtype(dp) != VDIR)) {
2177		vnode_put(dp);
2178		error = ENOTDIR;
2179		goto out;
2180	}
2181	*retdirp = dp;
2182
2183	nip->ni_cnd.cn_context = ctx;
2184
2185	if (*nxop && ((*nxop)->nxo_flags & NX_READONLY))
2186		cnp->cn_flags |= RDONLY;
2187
2188	cnp->cn_flags |= NOCROSSMOUNT;
2189	cnp->cn_nameptr = cnp->cn_pnbuf;
2190	nip->ni_usedvp = nip->ni_startdir = dp;
2191
2192	/*
2193	 * And call lookup() to do the real work
2194	 */
2195	cnflags = nip->ni_cnd.cn_flags; /* store in case we have to restore */
2196	while ((error = lookup(nip)) == ERECYCLE) {
2197		nip->ni_cnd.cn_flags = cnflags;
2198		cnp->cn_nameptr = cnp->cn_pnbuf;
2199		nip->ni_usedvp = nip->ni_dvp = nip->ni_startdir = dp;
2200	}
2201	if (error)
2202		goto out;
2203
2204	/* Check for encountering a symbolic link */
2205	if (cnp->cn_flags & ISSYMLINK) {
2206		if (cnp->cn_flags & (LOCKPARENT | WANTPARENT))
2207			vnode_put(nip->ni_dvp);
2208		if (nip->ni_vp) {
2209			vnode_put(nip->ni_vp);
2210			nip->ni_vp = NULL;
2211		}
2212		error = EINVAL;
2213	}
2214out:
2215	if (error) {
2216		tmppn = cnp->cn_pnbuf;
2217		cnp->cn_pnbuf = NULL;
2218		cnp->cn_flags &= ~HASBUF;
2219		FREE_ZONE(tmppn, cnp->cn_pnlen, M_NAMEI);
2220	}
2221	return (error);
2222}
2223
2224/*
2225 * A fiddled version of m_adj() that ensures null fill to a 4-byte
2226 * boundary and only trims off the back end
2227 */
2228void
2229nfsm_adj(mbuf_t mp, int len, int nul)
2230{
2231	mbuf_t m, mnext;
2232	int count, i, mlen;
2233	char *cp;
2234
2235	/*
2236	 * Trim from tail.  Scan the mbuf chain,
2237	 * calculating its length and finding the last mbuf.
2238	 * If the adjustment only affects this mbuf, then just
2239	 * adjust and return.  Otherwise, rescan and truncate
2240	 * after the remaining size.
2241	 */
2242	count = 0;
2243	m = mp;
2244	for (;;) {
2245		mlen = mbuf_len(m);
2246		count += mlen;
2247		mnext = mbuf_next(m);
2248		if (mnext == NULL)
2249			break;
2250		m = mnext;
2251	}
2252	if (mlen > len) {
2253		mlen -= len;
2254		mbuf_setlen(m, mlen);
2255		if (nul > 0) {
2256			cp = (caddr_t)mbuf_data(m) + mlen - nul;
2257			for (i = 0; i < nul; i++)
2258				*cp++ = '\0';
2259		}
2260		return;
2261	}
2262	count -= len;
2263	if (count < 0)
2264		count = 0;
2265	/*
2266	 * Correct length for chain is "count".
2267	 * Find the mbuf with last data, adjust its length,
2268	 * and toss data from remaining mbufs on chain.
2269	 */
2270	for (m = mp; m; m = mbuf_next(m)) {
2271		mlen = mbuf_len(m);
2272		if (mlen >= count) {
2273			mlen = count;
2274			mbuf_setlen(m, count);
2275			if (nul > 0) {
2276				cp = (caddr_t)mbuf_data(m) + mlen - nul;
2277				for (i = 0; i < nul; i++)
2278					*cp++ = '\0';
2279			}
2280			break;
2281		}
2282		count -= mlen;
2283	}
2284	for (m = mbuf_next(m); m; m = mbuf_next(m))
2285		mbuf_setlen(m, 0);
2286}
2287
2288/*
2289 * Trim the header out of the mbuf list and trim off any trailing
2290 * junk so that the mbuf list has only the write data.
2291 */
2292int
2293nfsm_chain_trim_data(struct nfsm_chain *nmc, int len, int *mlen)
2294{
2295	int cnt = 0, dlen, adjust;
2296	caddr_t data;
2297	mbuf_t m;
2298
2299	if (mlen)
2300		*mlen = 0;
2301
2302	/* trim header */
2303	for (m = nmc->nmc_mhead; m && (m != nmc->nmc_mcur); m = mbuf_next(m))
2304		mbuf_setlen(m, 0);
2305	if (!m)
2306		return (EIO);
2307
2308	/* trim current mbuf */
2309	data = mbuf_data(m);
2310	dlen = mbuf_len(m);
2311	adjust = nmc->nmc_ptr - data;
2312	dlen -= adjust;
2313	if ((dlen > 0) && (adjust > 0)) {
2314		if (mbuf_setdata(m, nmc->nmc_ptr, dlen))
2315			return(EIO);
2316	} else
2317		mbuf_setlen(m, dlen);
2318
2319	/* skip next len bytes  */
2320	for (; m && (cnt < len); m = mbuf_next(m)) {
2321		dlen = mbuf_len(m);
2322		cnt += dlen;
2323		if (cnt > len) {
2324			/* truncate to end of data */
2325			mbuf_setlen(m, dlen - (cnt - len));
2326			if (m == nmc->nmc_mcur)
2327				nmc->nmc_left -= (cnt - len);
2328			cnt = len;
2329		}
2330	}
2331	if (mlen)
2332		*mlen = cnt;
2333
2334	/* trim any trailing data */
2335	if (m == nmc->nmc_mcur)
2336		nmc->nmc_left = 0;
2337	for (; m; m = mbuf_next(m))
2338		mbuf_setlen(m, 0);
2339
2340	return (0);
2341}
2342
2343int
2344nfsm_chain_add_fattr(
2345	struct nfsrv_descript *nd,
2346	struct nfsm_chain *nmc,
2347	struct vnode_attr *vap)
2348{
2349	int error = 0;
2350
2351	// XXX Should we assert here that all fields are supported?
2352
2353	nfsm_chain_add_32(error, nmc, vtonfs_type(vap->va_type, nd->nd_vers));
2354	if (nd->nd_vers == NFS_VER3) {
2355		nfsm_chain_add_32(error, nmc, vap->va_mode & 07777);
2356	} else {
2357		nfsm_chain_add_32(error, nmc, vtonfsv2_mode(vap->va_type, vap->va_mode));
2358	}
2359	nfsm_chain_add_32(error, nmc, vap->va_nlink);
2360	nfsm_chain_add_32(error, nmc, vap->va_uid);
2361	nfsm_chain_add_32(error, nmc, vap->va_gid);
2362	if (nd->nd_vers == NFS_VER3) {
2363		nfsm_chain_add_64(error, nmc, vap->va_data_size);
2364		nfsm_chain_add_64(error, nmc, vap->va_data_alloc);
2365		nfsm_chain_add_32(error, nmc, major(vap->va_rdev));
2366		nfsm_chain_add_32(error, nmc, minor(vap->va_rdev));
2367		nfsm_chain_add_64(error, nmc, vap->va_fsid);
2368		nfsm_chain_add_64(error, nmc, vap->va_fileid);
2369	} else {
2370		nfsm_chain_add_32(error, nmc, vap->va_data_size);
2371		nfsm_chain_add_32(error, nmc, NFS_FABLKSIZE);
2372		if (vap->va_type == VFIFO)
2373			nfsm_chain_add_32(error, nmc, 0xffffffff);
2374		else
2375			nfsm_chain_add_32(error, nmc, vap->va_rdev);
2376		nfsm_chain_add_32(error, nmc, vap->va_data_alloc / NFS_FABLKSIZE);
2377		nfsm_chain_add_32(error, nmc, vap->va_fsid);
2378		nfsm_chain_add_32(error, nmc, vap->va_fileid);
2379	}
2380	nfsm_chain_add_time(error, nmc, nd->nd_vers, &vap->va_access_time);
2381	nfsm_chain_add_time(error, nmc, nd->nd_vers, &vap->va_modify_time);
2382	nfsm_chain_add_time(error, nmc, nd->nd_vers, &vap->va_change_time);
2383
2384	return (error);
2385}
2386
2387int
2388nfsm_chain_get_sattr(
2389	struct nfsrv_descript *nd,
2390	struct nfsm_chain *nmc,
2391	struct vnode_attr *vap)
2392{
2393	int error = 0;
2394	uint32_t val = 0;
2395	uint64_t val64 = 0;
2396	struct timespec now;
2397
2398	if (nd->nd_vers == NFS_VER2) {
2399		/*
2400		 * There is/was a bug in the Sun client that puts 0xffff in the mode
2401		 * field of sattr when it should put in 0xffffffff.  The u_short
2402		 * doesn't sign extend.  So check the low order 2 bytes for 0xffff.
2403		 */
2404		nfsm_chain_get_32(error, nmc, val);
2405		if ((val & 0xffff) != 0xffff) {
2406			VATTR_SET(vap, va_mode, val & 07777);
2407			/* save the "type" bits for NFSv2 create */
2408			VATTR_SET(vap, va_type, IFTOVT(val));
2409			VATTR_CLEAR_ACTIVE(vap, va_type);
2410		}
2411		nfsm_chain_get_32(error, nmc, val);
2412		if (val != (uint32_t)-1)
2413			VATTR_SET(vap, va_uid, val);
2414		nfsm_chain_get_32(error, nmc, val);
2415		if (val != (uint32_t)-1)
2416			VATTR_SET(vap, va_gid, val);
2417		/* save the "size" bits for NFSv2 create (even if they appear unset) */
2418		nfsm_chain_get_32(error, nmc, val);
2419		VATTR_SET(vap, va_data_size, val);
2420		if (val == (uint32_t)-1)
2421			VATTR_CLEAR_ACTIVE(vap, va_data_size);
2422		nfsm_chain_get_time(error, nmc, NFS_VER2,
2423			vap->va_access_time.tv_sec,
2424			vap->va_access_time.tv_nsec);
2425		if (vap->va_access_time.tv_sec != -1)
2426			VATTR_SET_ACTIVE(vap, va_access_time);
2427		nfsm_chain_get_time(error, nmc, NFS_VER2,
2428			vap->va_modify_time.tv_sec,
2429			vap->va_modify_time.tv_nsec);
2430		if (vap->va_modify_time.tv_sec != -1)
2431			VATTR_SET_ACTIVE(vap, va_modify_time);
2432		return (error);
2433	}
2434
2435	/* NFSv3 */
2436	nfsm_chain_get_32(error, nmc, val);
2437	if (val) {
2438		nfsm_chain_get_32(error, nmc, val);
2439		VATTR_SET(vap, va_mode, val & 07777);
2440	}
2441	nfsm_chain_get_32(error, nmc, val);
2442	if (val) {
2443		nfsm_chain_get_32(error, nmc, val);
2444		VATTR_SET(vap, va_uid, val);
2445	}
2446	nfsm_chain_get_32(error, nmc, val);
2447	if (val) {
2448		nfsm_chain_get_32(error, nmc, val);
2449		VATTR_SET(vap, va_gid, val);
2450	}
2451	nfsm_chain_get_32(error, nmc, val);
2452	if (val) {
2453		nfsm_chain_get_64(error, nmc, val64);
2454		VATTR_SET(vap, va_data_size, val64);
2455	}
2456	nanotime(&now);
2457	nfsm_chain_get_32(error, nmc, val);
2458	switch (val) {
2459	case NFS_TIME_SET_TO_CLIENT:
2460		nfsm_chain_get_time(error, nmc, nd->nd_vers,
2461			vap->va_access_time.tv_sec,
2462			vap->va_access_time.tv_nsec);
2463		VATTR_SET_ACTIVE(vap, va_access_time);
2464		vap->va_vaflags &= ~VA_UTIMES_NULL;
2465		break;
2466	case NFS_TIME_SET_TO_SERVER:
2467		VATTR_SET(vap, va_access_time, now);
2468		vap->va_vaflags |= VA_UTIMES_NULL;
2469		break;
2470	}
2471	nfsm_chain_get_32(error, nmc, val);
2472	switch (val) {
2473	case NFS_TIME_SET_TO_CLIENT:
2474		nfsm_chain_get_time(error, nmc, nd->nd_vers,
2475			vap->va_modify_time.tv_sec,
2476			vap->va_modify_time.tv_nsec);
2477		VATTR_SET_ACTIVE(vap, va_modify_time);
2478		vap->va_vaflags &= ~VA_UTIMES_NULL;
2479		break;
2480	case NFS_TIME_SET_TO_SERVER:
2481		VATTR_SET(vap, va_modify_time, now);
2482		if (!VATTR_IS_ACTIVE(vap, va_access_time))
2483			vap->va_vaflags |= VA_UTIMES_NULL;
2484		break;
2485	}
2486
2487	return (error);
2488}
2489
2490/*
2491 * Compare two security flavor structs
2492 */
2493int
2494nfsrv_cmp_secflavs(struct nfs_sec *sf1, struct nfs_sec *sf2)
2495{
2496	int i;
2497
2498	if (sf1->count != sf2->count)
2499		return 1;
2500	for (i = 0; i < sf1->count; i++)
2501		if (sf1->flavors[i] != sf2->flavors[i])
2502			return 1;
2503	return 0;
2504}
2505
2506/*
2507 * Build hash lists of net addresses and hang them off the NFS export.
2508 * Called by nfsrv_export() to set up the lists of export addresses.
2509 */
2510int
2511nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa)
2512{
2513	struct nfs_export_net_args nxna;
2514	struct nfs_netopt *no, *rn_no;
2515	struct radix_node_head *rnh;
2516	struct radix_node *rn;
2517	struct sockaddr *saddr, *smask;
2518	struct domain *dom;
2519	int i, error;
2520	unsigned int net;
2521	user_addr_t uaddr;
2522	kauth_cred_t cred;
2523
2524	uaddr = unxa->nxa_nets;
2525	for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) {
2526		error = copyin(uaddr, &nxna, sizeof(nxna));
2527		if (error)
2528			return (error);
2529
2530		if (nxna.nxna_flags & (NX_MAPROOT|NX_MAPALL)) {
2531			struct posix_cred temp_pcred;
2532		        bzero(&temp_pcred, sizeof(temp_pcred));
2533			temp_pcred.cr_uid = nxna.nxna_cred.cr_uid;
2534			temp_pcred.cr_ngroups = nxna.nxna_cred.cr_ngroups;
2535			for (i=0; i < nxna.nxna_cred.cr_ngroups && i < NGROUPS; i++)
2536				temp_pcred.cr_groups[i] = nxna.nxna_cred.cr_groups[i];
2537			cred = posix_cred_create(&temp_pcred);
2538			if (!IS_VALID_CRED(cred))
2539				return (ENOMEM);
2540		} else {
2541			cred = NOCRED;
2542		}
2543
2544		if (nxna.nxna_addr.ss_len == 0) {
2545			/* No address means this is a default/world export */
2546			if (nx->nx_flags & NX_DEFAULTEXPORT) {
2547			        if (IS_VALID_CRED(cred))
2548				        kauth_cred_unref(&cred);
2549				return (EEXIST);
2550			}
2551			nx->nx_flags |= NX_DEFAULTEXPORT;
2552			nx->nx_defopt.nxo_flags = nxna.nxna_flags;
2553			nx->nx_defopt.nxo_cred = cred;
2554			bcopy(&nxna.nxna_sec, &nx->nx_defopt.nxo_sec, sizeof(struct nfs_sec));
2555			nx->nx_expcnt++;
2556			continue;
2557		}
2558
2559		i = sizeof(struct nfs_netopt);
2560		i += nxna.nxna_addr.ss_len + nxna.nxna_mask.ss_len;
2561		MALLOC(no, struct nfs_netopt *, i, M_NETADDR, M_WAITOK);
2562		if (!no) {
2563			if (IS_VALID_CRED(cred))
2564				kauth_cred_unref(&cred);
2565			return (ENOMEM);
2566		}
2567		bzero(no, sizeof(struct nfs_netopt));
2568		no->no_opt.nxo_flags = nxna.nxna_flags;
2569		no->no_opt.nxo_cred = cred;
2570		bcopy(&nxna.nxna_sec, &no->no_opt.nxo_sec, sizeof(struct nfs_sec));
2571
2572		saddr = (struct sockaddr *)(no + 1);
2573		bcopy(&nxna.nxna_addr, saddr, nxna.nxna_addr.ss_len);
2574		if (nxna.nxna_mask.ss_len) {
2575			smask = (struct sockaddr *)((caddr_t)saddr + nxna.nxna_addr.ss_len);
2576			bcopy(&nxna.nxna_mask, smask, nxna.nxna_mask.ss_len);
2577		} else {
2578			smask = NULL;
2579		}
2580		i = saddr->sa_family;
2581		if ((rnh = nx->nx_rtable[i]) == 0) {
2582			/*
2583			 * Seems silly to initialize every AF when most are not
2584			 * used, do so on demand here
2585			 */
2586			TAILQ_FOREACH(dom, &domains, dom_entry) {
2587				if (dom->dom_family == i && dom->dom_rtattach) {
2588					dom->dom_rtattach((void **)&nx->nx_rtable[i],
2589						dom->dom_rtoffset);
2590					break;
2591				}
2592			}
2593			if ((rnh = nx->nx_rtable[i]) == 0) {
2594			        if (IS_VALID_CRED(cred))
2595				        kauth_cred_unref(&cred);
2596				_FREE(no, M_NETADDR);
2597				return (ENOBUFS);
2598			}
2599		}
2600		rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, no->no_rnodes);
2601		if (rn == 0) {
2602			/*
2603			 * One of the reasons that rnh_addaddr may fail is that
2604			 * the entry already exists. To check for this case, we
2605			 * look up the entry to see if it is there. If so, we
2606			 * do not need to make a new entry but do continue.
2607			 *
2608			 * XXX should this be rnh_lookup() instead?
2609			 */
2610			int matched = 0;
2611			rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2612			rn_no = (struct nfs_netopt *)rn;
2613			if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2614			    (rn_no->no_opt.nxo_flags == nxna.nxna_flags) &&
2615			    (!nfsrv_cmp_secflavs(&rn_no->no_opt.nxo_sec, &nxna.nxna_sec))) {
2616				kauth_cred_t cred2 = rn_no->no_opt.nxo_cred;
2617				if (cred == cred2) {
2618					/* creds are same (or both NULL) */
2619					matched = 1;
2620				} else if (cred && cred2 && (kauth_cred_getuid(cred) == kauth_cred_getuid(cred2))) {
2621				    /*
2622				     * Now compare the effective and
2623				     * supplementary groups...
2624				     *
2625				     * Note: This comparison, as written,
2626				     * does not correctly indicate that
2627				     * the groups are equivalent, since
2628				     * other than the first supplementary
2629				     * group, which is also the effective
2630				     * group, order on the remaining groups
2631				     * doesn't matter, and this is an
2632				     * ordered compare.
2633				     */
2634				    gid_t groups[NGROUPS];
2635				    gid_t groups2[NGROUPS];
2636				    int groupcount = NGROUPS;
2637				    int group2count = NGROUPS;
2638
2639				    if (!kauth_cred_getgroups(cred, groups, &groupcount) &&
2640					!kauth_cred_getgroups(cred2, groups2, &group2count) &&
2641					groupcount == group2count) {
2642					    for (i=0; i < group2count; i++)
2643						    if (groups[i] != groups2[i])
2644							    break;
2645					    if (i >= group2count || i >= NGROUPS)
2646					    matched = 1;
2647				    }
2648				}
2649			}
2650			if (IS_VALID_CRED(cred))
2651			        kauth_cred_unref(&cred);
2652			_FREE(no, M_NETADDR);
2653			if (matched)
2654				continue;
2655			return (EPERM);
2656		}
2657		nx->nx_expcnt++;
2658	}
2659
2660	return (0);
2661}
2662
2663/*
2664 * In order to properly track an export's netopt count, we need to pass
2665 * an additional argument to nfsrv_free_netopt() so that it can decrement
2666 * the export's netopt count.
2667 */
2668struct nfsrv_free_netopt_arg {
2669	uint32_t *cnt;
2670	struct radix_node_head *rnh;
2671};
2672
2673int
2674nfsrv_free_netopt(struct radix_node *rn, void *w)
2675{
2676	struct nfsrv_free_netopt_arg *fna = (struct nfsrv_free_netopt_arg *)w;
2677	struct radix_node_head *rnh = fna->rnh;
2678	uint32_t *cnt = fna->cnt;
2679	struct nfs_netopt *nno = (struct nfs_netopt *)rn;
2680
2681	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2682	if (IS_VALID_CRED(nno->no_opt.nxo_cred))
2683		kauth_cred_unref(&nno->no_opt.nxo_cred);
2684	_FREE((caddr_t)rn, M_NETADDR);
2685	*cnt -= 1;
2686	return (0);
2687}
2688
2689/*
2690 * Free the net address hash lists that are hanging off the mount points.
2691 */
2692int
2693nfsrv_free_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa)
2694{
2695	struct nfs_export_net_args nxna;
2696	struct radix_node_head *rnh;
2697	struct radix_node *rn;
2698	struct nfsrv_free_netopt_arg fna;
2699	struct nfs_netopt *nno;
2700	user_addr_t uaddr;
2701	unsigned int net;
2702	int i, error;
2703
2704	if (!unxa || !unxa->nxa_netcount) {
2705		/* delete everything */
2706		for (i = 0; i <= AF_MAX; i++)
2707			if ( (rnh = nx->nx_rtable[i]) ) {
2708				fna.rnh = rnh;
2709				fna.cnt = &nx->nx_expcnt;
2710				(*rnh->rnh_walktree)(rnh, nfsrv_free_netopt, (caddr_t)&fna);
2711				_FREE((caddr_t)rnh, M_RTABLE);
2712				nx->nx_rtable[i] = 0;
2713			}
2714		return (0);
2715	}
2716
2717	/* delete only the exports specified */
2718	uaddr = unxa->nxa_nets;
2719	for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) {
2720		error = copyin(uaddr, &nxna, sizeof(nxna));
2721		if (error)
2722			return (error);
2723
2724		if (nxna.nxna_addr.ss_len == 0) {
2725			/* No address means this is a default/world export */
2726			if (nx->nx_flags & NX_DEFAULTEXPORT) {
2727				nx->nx_flags &= ~NX_DEFAULTEXPORT;
2728				if (IS_VALID_CRED(nx->nx_defopt.nxo_cred)) {
2729					kauth_cred_unref(&nx->nx_defopt.nxo_cred);
2730				}
2731				nx->nx_expcnt--;
2732			}
2733			continue;
2734		}
2735
2736		if ((rnh = nx->nx_rtable[nxna.nxna_addr.ss_family]) == 0) {
2737			/* AF not initialized? */
2738			if (!(unxa->nxa_flags & NXA_ADD))
2739				printf("nfsrv_free_addrlist: address not found (0)\n");
2740			continue;
2741		}
2742
2743		rn = (*rnh->rnh_lookup)(&nxna.nxna_addr,
2744			nxna.nxna_mask.ss_len ? &nxna.nxna_mask : NULL, rnh);
2745		if (!rn || (rn->rn_flags & RNF_ROOT)) {
2746			if (!(unxa->nxa_flags & NXA_ADD))
2747				printf("nfsrv_free_addrlist: address not found (1)\n");
2748			continue;
2749		}
2750
2751		(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2752		nno = (struct nfs_netopt *)rn;
2753		if (IS_VALID_CRED(nno->no_opt.nxo_cred))
2754			kauth_cred_unref(&nno->no_opt.nxo_cred);
2755		_FREE((caddr_t)rn, M_NETADDR);
2756
2757		nx->nx_expcnt--;
2758		if (nx->nx_expcnt == ((nx->nx_flags & NX_DEFAULTEXPORT) ? 1 : 0)) {
2759			/* no more entries in rnh, so free it up */
2760			_FREE((caddr_t)rnh, M_RTABLE);
2761			nx->nx_rtable[nxna.nxna_addr.ss_family] = 0;
2762		}
2763	}
2764
2765	return (0);
2766}
2767
2768void enablequotas(struct mount *mp, vfs_context_t ctx); // XXX
2769
2770int
2771nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
2772{
2773	int error = 0;
2774	size_t pathlen;
2775	struct nfs_exportfs *nxfs, *nxfs2, *nxfs3;
2776	struct nfs_export *nx, *nx2, *nx3;
2777	struct nfs_filehandle nfh;
2778	struct nameidata mnd, xnd;
2779	vnode_t mvp = NULL, xvp = NULL;
2780	mount_t mp = NULL;
2781	char path[MAXPATHLEN];
2782	int expisroot;
2783
2784	if (unxa->nxa_flags == NXA_CHECK) {
2785		/* just check if the path is an NFS-exportable file system */
2786		error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, &pathlen);
2787		if (error)
2788			return (error);
2789		NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
2790			UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
2791		error = namei(&mnd);
2792		if (error)
2793			return (error);
2794		mvp = mnd.ni_vp;
2795		mp = vnode_mount(mvp);
2796		/* make sure it's the root of a file system */
2797		if (!vnode_isvroot(mvp))
2798			error = EINVAL;
2799		/* make sure the file system is NFS-exportable */
2800		if (!error) {
2801			nfh.nfh_len = NFSV3_MAX_FID_SIZE;
2802			error = VFS_VPTOFH(mvp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
2803		}
2804		if (!error && (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE))
2805			error = EIO;
2806		if (!error && !(mp->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED))
2807			error = EISDIR;
2808		vnode_put(mvp);
2809		nameidone(&mnd);
2810		return (error);
2811	}
2812
2813	/* all other operations: must be super user */
2814	if ((error = vfs_context_suser(ctx)))
2815		return (error);
2816
2817	if (unxa->nxa_flags & NXA_DELETE_ALL) {
2818		/* delete all exports on all file systems */
2819		lck_rw_lock_exclusive(&nfsrv_export_rwlock);
2820		while ((nxfs = LIST_FIRST(&nfsrv_exports))) {
2821			mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path);
2822			if (mp) {
2823				vfs_clearflags(mp, MNT_EXPORTED);
2824				mount_iterdrop(mp);
2825				mp = NULL;
2826			}
2827			/* delete all exports on this file system */
2828			while ((nx = LIST_FIRST(&nxfs->nxfs_exports))) {
2829				LIST_REMOVE(nx, nx_next);
2830				LIST_REMOVE(nx, nx_hash);
2831				/* delete all netopts for this export */
2832				nfsrv_free_addrlist(nx, NULL);
2833				nx->nx_flags &= ~NX_DEFAULTEXPORT;
2834				if (IS_VALID_CRED(nx->nx_defopt.nxo_cred)) {
2835					kauth_cred_unref(&nx->nx_defopt.nxo_cred);
2836				}
2837				/* free active user list for this export */
2838				nfsrv_free_user_list(&nx->nx_user_list);
2839				FREE(nx->nx_path, M_TEMP);
2840				FREE(nx, M_TEMP);
2841			}
2842			LIST_REMOVE(nxfs, nxfs_next);
2843			FREE(nxfs->nxfs_path, M_TEMP);
2844			FREE(nxfs, M_TEMP);
2845		}
2846		if (nfsrv_export_hashtbl) {
2847			/* all exports deleted, clean up export hash table */
2848			FREE(nfsrv_export_hashtbl, M_TEMP);
2849			nfsrv_export_hashtbl = NULL;
2850		}
2851		lck_rw_done(&nfsrv_export_rwlock);
2852		return (0);
2853	}
2854
2855	error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, &pathlen);
2856	if (error)
2857		return (error);
2858
2859	lck_rw_lock_exclusive(&nfsrv_export_rwlock);
2860
2861	/* init export hash table if not already */
2862	if (!nfsrv_export_hashtbl) {
2863		if (nfsrv_export_hash_size <= 0)
2864			nfsrv_export_hash_size = NFSRVEXPHASHSZ;
2865		nfsrv_export_hashtbl = hashinit(nfsrv_export_hash_size, M_TEMP, &nfsrv_export_hash);
2866	}
2867
2868	// first check if we've already got an exportfs with the given ID
2869	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
2870		if (nxfs->nxfs_id == unxa->nxa_fsid)
2871			break;
2872	}
2873	if (nxfs) {
2874		/* verify exported FS path matches given path */
2875		if (strncmp(path, nxfs->nxfs_path, MAXPATHLEN)) {
2876			error = EEXIST;
2877			goto unlock_out;
2878		}
2879		if ((unxa->nxa_flags & (NXA_ADD|NXA_OFFLINE)) == NXA_ADD) {
2880			/* if adding, verify that the mount is still what we expect */
2881			mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path);
2882			if (mp) {
2883				mount_ref(mp, 0);
2884				mount_iterdrop(mp);
2885			}
2886			/* find exported FS root vnode */
2887			NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
2888				UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs->nxfs_path), ctx);
2889			error = namei(&mnd);
2890			if (error)
2891				goto unlock_out;
2892			mvp = mnd.ni_vp;
2893			/* make sure it's (still) the root of a file system */
2894			if (!vnode_isvroot(mvp)) {
2895				error = EINVAL;
2896				goto out;
2897			}
2898			/* sanity check: this should be same mount */
2899			if (mp != vnode_mount(mvp)) {
2900				error = EINVAL;
2901				goto out;
2902			}
2903		}
2904	} else {
2905		/* no current exported file system with that ID */
2906		if (!(unxa->nxa_flags & NXA_ADD)) {
2907			error = ENOENT;
2908			goto unlock_out;
2909		}
2910
2911		/* find exported FS root vnode */
2912		NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
2913			UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
2914		error = namei(&mnd);
2915		if (error) {
2916			if (!(unxa->nxa_flags & NXA_OFFLINE))
2917				goto unlock_out;
2918		} else {
2919			mvp = mnd.ni_vp;
2920			/* make sure it's the root of a file system */
2921			if (!vnode_isvroot(mvp)) {
2922				/* bail if not marked offline */
2923				if (!(unxa->nxa_flags & NXA_OFFLINE)) {
2924					error = EINVAL;
2925					goto out;
2926				}
2927				vnode_put(mvp);
2928				nameidone(&mnd);
2929				mvp = NULL;
2930			} else {
2931				mp = vnode_mount(mvp);
2932				mount_ref(mp, 0);
2933
2934				/* make sure the file system is NFS-exportable */
2935				nfh.nfh_len = NFSV3_MAX_FID_SIZE;
2936				error = VFS_VPTOFH(mvp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
2937				if (!error && (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE))
2938					error = EIO;
2939				if (!error && !(mp->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED))
2940					error = EISDIR;
2941				if (error)
2942					goto out;
2943			}
2944		}
2945
2946		/* add an exportfs for it */
2947		MALLOC(nxfs, struct nfs_exportfs *, sizeof(struct nfs_exportfs), M_TEMP, M_WAITOK);
2948		if (!nxfs) {
2949			error = ENOMEM;
2950			goto out;
2951		}
2952		bzero(nxfs, sizeof(struct nfs_exportfs));
2953		nxfs->nxfs_id = unxa->nxa_fsid;
2954		MALLOC(nxfs->nxfs_path, char*, pathlen, M_TEMP, M_WAITOK);
2955		if (!nxfs->nxfs_path) {
2956			FREE(nxfs, M_TEMP);
2957			error = ENOMEM;
2958			goto out;
2959		}
2960		bcopy(path, nxfs->nxfs_path, pathlen);
2961		/* insert into list in reverse-sorted order */
2962		nxfs3 = NULL;
2963		LIST_FOREACH(nxfs2, &nfsrv_exports, nxfs_next) {
2964			if (strncmp(nxfs->nxfs_path, nxfs2->nxfs_path, MAXPATHLEN) > 0)
2965				break;
2966			nxfs3 = nxfs2;
2967		}
2968		if (nxfs2)
2969			LIST_INSERT_BEFORE(nxfs2, nxfs, nxfs_next);
2970		else if (nxfs3)
2971			LIST_INSERT_AFTER(nxfs3, nxfs, nxfs_next);
2972		else
2973			LIST_INSERT_HEAD(&nfsrv_exports, nxfs, nxfs_next);
2974
2975		/* make sure any quotas are enabled before we export the file system */
2976		if (mp)
2977			enablequotas(mp, ctx);
2978	}
2979
2980	if (unxa->nxa_exppath) {
2981		error = copyinstr(unxa->nxa_exppath, path, MAXPATHLEN, &pathlen);
2982		if (error)
2983			goto out;
2984		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
2985			if (nx->nx_id == unxa->nxa_expid)
2986				break;
2987		}
2988		if (nx) {
2989			/* verify exported FS path matches given path */
2990			if (strncmp(path, nx->nx_path, MAXPATHLEN)) {
2991				error = EEXIST;
2992				goto out;
2993			}
2994		} else {
2995			/* no current export with that ID */
2996			if (!(unxa->nxa_flags & NXA_ADD)) {
2997				error = ENOENT;
2998				goto out;
2999			}
3000			/* add an export for it */
3001			MALLOC(nx, struct nfs_export *, sizeof(struct nfs_export), M_TEMP, M_WAITOK);
3002			if (!nx) {
3003				error = ENOMEM;
3004				goto out1;
3005			}
3006			bzero(nx, sizeof(struct nfs_export));
3007			nx->nx_id = unxa->nxa_expid;
3008			nx->nx_fs = nxfs;
3009			microtime(&nx->nx_exptime);
3010			MALLOC(nx->nx_path, char*, pathlen, M_TEMP, M_WAITOK);
3011			if (!nx->nx_path) {
3012				error = ENOMEM;
3013				FREE(nx, M_TEMP);
3014				nx = NULL;
3015				goto out1;
3016			}
3017			bcopy(path, nx->nx_path, pathlen);
3018			/* initialize the active user list */
3019			nfsrv_init_user_list(&nx->nx_user_list);
3020			/* insert into list in reverse-sorted order */
3021			nx3 = NULL;
3022			LIST_FOREACH(nx2, &nxfs->nxfs_exports, nx_next) {
3023				if (strncmp(nx->nx_path, nx2->nx_path, MAXPATHLEN) > 0)
3024					break;
3025				nx3 = nx2;
3026			}
3027			if (nx2)
3028				LIST_INSERT_BEFORE(nx2, nx, nx_next);
3029			else if (nx3)
3030				LIST_INSERT_AFTER(nx3, nx, nx_next);
3031			else
3032				LIST_INSERT_HEAD(&nxfs->nxfs_exports, nx, nx_next);
3033			/* insert into hash */
3034			LIST_INSERT_HEAD(NFSRVEXPHASH(nxfs->nxfs_id, nx->nx_id), nx, nx_hash);
3035
3036			/*
3037			 * We don't allow/support nested exports.  Check if the new entry
3038			 * nests with the entries before and after or if there's an
3039			 * entry for the file system root and subdirs.
3040			 */
3041			error = 0;
3042			if ((nx3 && !strncmp(nx3->nx_path, nx->nx_path, pathlen - 1) &&
3043				    (nx3->nx_path[pathlen-1] == '/')) ||
3044			    (nx2 && !strncmp(nx2->nx_path, nx->nx_path, strlen(nx2->nx_path)) &&
3045			    	    (nx->nx_path[strlen(nx2->nx_path)] == '/')))
3046				error = EINVAL;
3047			if (!error) {
3048				/* check export conflict with fs root export and vice versa */
3049				expisroot = !nx->nx_path[0] ||
3050					    ((nx->nx_path[0] == '.') && !nx->nx_path[1]);
3051				LIST_FOREACH(nx2, &nxfs->nxfs_exports, nx_next) {
3052					if (expisroot) {
3053						if (nx2 != nx)
3054							break;
3055					} else if (!nx2->nx_path[0])
3056						break;
3057					else if ((nx2->nx_path[0] == '.') && !nx2->nx_path[1])
3058						break;
3059				}
3060				if (nx2)
3061					error = EINVAL;
3062			}
3063			if (error) {
3064				/*
3065				 * Don't actually return an error because mountd is
3066				 * probably about to delete the conflicting export.
3067				 * This can happen when a new export momentarily conflicts
3068				 * with an old export while the transition is being made.
3069				 * Theoretically, mountd could be written to avoid this
3070				 * transient situation - but it would greatly increase the
3071				 * complexity of mountd for very little overall benefit.
3072				 */
3073				printf("nfsrv_export: warning: nested exports: %s/%s\n",
3074					nxfs->nxfs_path, nx->nx_path);
3075				error = 0;
3076			}
3077			nx->nx_fh.nfh_xh.nxh_flags = NXHF_INVALIDFH;
3078		}
3079		/* make sure file handle is set up */
3080		if ((nx->nx_fh.nfh_xh.nxh_version != htonl(NFS_FH_VERSION)) ||
3081		    (nx->nx_fh.nfh_xh.nxh_flags & NXHF_INVALIDFH)) {
3082			/* try to set up export root file handle */
3083			nx->nx_fh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
3084			nx->nx_fh.nfh_xh.nxh_fsid = htonl(nx->nx_fs->nxfs_id);
3085			nx->nx_fh.nfh_xh.nxh_expid = htonl(nx->nx_id);
3086			nx->nx_fh.nfh_xh.nxh_flags = 0;
3087			nx->nx_fh.nfh_xh.nxh_reserved = 0;
3088			nx->nx_fh.nfh_fhp = (u_char*)&nx->nx_fh.nfh_xh;
3089			bzero(&nx->nx_fh.nfh_fid[0], NFSV2_MAX_FID_SIZE);
3090			if (mvp) {
3091				/* find export root vnode */
3092				if (!nx->nx_path[0] || ((nx->nx_path[0] == '.') && !nx->nx_path[1])) {
3093					/* exporting file system's root directory */
3094					xvp = mvp;
3095					vnode_get(xvp);
3096				} else {
3097					xnd.ni_cnd.cn_nameiop = LOOKUP;
3098#if CONFIG_TRIGGERS
3099					xnd.ni_op = OP_LOOKUP;
3100#endif
3101					xnd.ni_cnd.cn_flags = LOCKLEAF;
3102					xnd.ni_pathlen = pathlen - 1;
3103					xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf = path;
3104					xnd.ni_startdir = mvp;
3105					xnd.ni_usedvp   = mvp;
3106					xnd.ni_cnd.cn_context = ctx;
3107					while ((error = lookup(&xnd)) == ERECYCLE) {
3108						xnd.ni_cnd.cn_flags = LOCKLEAF;
3109						xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf;
3110						xnd.ni_usedvp = xnd.ni_dvp = xnd.ni_startdir = mvp;
3111					}
3112					if (error)
3113						goto out1;
3114					xvp = xnd.ni_vp;
3115				}
3116
3117				if (vnode_vtype(xvp) != VDIR) {
3118					error = EINVAL;
3119					vnode_put(xvp);
3120					goto out1;
3121				}
3122
3123				/* grab file handle */
3124				nx->nx_fh.nfh_len = NFSV3_MAX_FID_SIZE;
3125				error = VFS_VPTOFH(xvp, (int*)&nx->nx_fh.nfh_len, &nx->nx_fh.nfh_fid[0], NULL);
3126				if (!error && (nx->nx_fh.nfh_len > (int)NFSV3_MAX_FID_SIZE)) {
3127					error = EIO;
3128				} else {
3129					nx->nx_fh.nfh_xh.nxh_fidlen = nx->nx_fh.nfh_len;
3130					nx->nx_fh.nfh_len += sizeof(nx->nx_fh.nfh_xh);
3131				}
3132
3133				vnode_put(xvp);
3134				if (error)
3135					goto out1;
3136			} else {
3137				nx->nx_fh.nfh_xh.nxh_flags = NXHF_INVALIDFH;
3138				nx->nx_fh.nfh_xh.nxh_fidlen = 0;
3139				nx->nx_fh.nfh_len = sizeof(nx->nx_fh.nfh_xh);
3140			}
3141		}
3142	} else {
3143		nx = NULL;
3144	}
3145
3146	/* perform the export changes */
3147	if (unxa->nxa_flags & NXA_DELETE) {
3148		if (!nx) {
3149			/* delete all exports on this file system */
3150			while ((nx = LIST_FIRST(&nxfs->nxfs_exports))) {
3151				LIST_REMOVE(nx, nx_next);
3152				LIST_REMOVE(nx, nx_hash);
3153				/* delete all netopts for this export */
3154				nfsrv_free_addrlist(nx, NULL);
3155				nx->nx_flags &= ~NX_DEFAULTEXPORT;
3156				if (IS_VALID_CRED(nx->nx_defopt.nxo_cred)) {
3157					kauth_cred_unref(&nx->nx_defopt.nxo_cred);
3158				}
3159				/* delete active user list for this export */
3160				nfsrv_free_user_list(&nx->nx_user_list);
3161				FREE(nx->nx_path, M_TEMP);
3162				FREE(nx, M_TEMP);
3163			}
3164			goto out1;
3165		} else if (!unxa->nxa_netcount) {
3166			/* delete all netopts for this export */
3167			nfsrv_free_addrlist(nx, NULL);
3168			nx->nx_flags &= ~NX_DEFAULTEXPORT;
3169			if (IS_VALID_CRED(nx->nx_defopt.nxo_cred)) {
3170				kauth_cred_unref(&nx->nx_defopt.nxo_cred);
3171			}
3172		} else {
3173			/* delete only the netopts for the given addresses */
3174			error = nfsrv_free_addrlist(nx, unxa);
3175			if (error)
3176				goto out1;
3177		}
3178	}
3179	if (unxa->nxa_flags & NXA_ADD) {
3180		/*
3181		 * If going offline set the export time so that when
3182		 * coming back on line we will present a new write verifier
3183		 * to the client.
3184		 */
3185		if (unxa->nxa_flags & NXA_OFFLINE)
3186			microtime(&nx->nx_exptime);
3187
3188		error = nfsrv_hang_addrlist(nx, unxa);
3189		if (!error && mp)
3190			vfs_setflags(mp, MNT_EXPORTED);
3191	}
3192
3193out1:
3194	if (nx && !nx->nx_expcnt) {
3195		/* export has no export options */
3196		LIST_REMOVE(nx, nx_next);
3197		LIST_REMOVE(nx, nx_hash);
3198		/* delete active user list for this export */
3199		nfsrv_free_user_list(&nx->nx_user_list);
3200		FREE(nx->nx_path, M_TEMP);
3201		FREE(nx, M_TEMP);
3202	}
3203	if (LIST_EMPTY(&nxfs->nxfs_exports)) {
3204		/* exported file system has no more exports */
3205		LIST_REMOVE(nxfs, nxfs_next);
3206		FREE(nxfs->nxfs_path, M_TEMP);
3207		FREE(nxfs, M_TEMP);
3208		if (mp)
3209			vfs_clearflags(mp, MNT_EXPORTED);
3210	}
3211
3212out:
3213	if (mvp) {
3214		vnode_put(mvp);
3215		nameidone(&mnd);
3216	}
3217unlock_out:
3218	if (mp)
3219		mount_drop(mp, 0);
3220	lck_rw_done(&nfsrv_export_rwlock);
3221	return (error);
3222}
3223
3224struct nfs_export_options *
3225nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam)
3226{
3227	struct nfs_export_options *nxo = NULL;
3228	struct nfs_netopt *no = NULL;
3229	struct radix_node_head *rnh;
3230	struct sockaddr *saddr;
3231
3232	/* Lookup in the export list first. */
3233	if (nam != NULL) {
3234		saddr = mbuf_data(nam);
3235		rnh = nx->nx_rtable[saddr->sa_family];
3236		if (rnh != NULL) {
3237			no = (struct nfs_netopt *)
3238				(*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
3239			if (no && no->no_rnodes->rn_flags & RNF_ROOT)
3240				no = NULL;
3241			if (no)
3242				nxo = &no->no_opt;
3243		}
3244	}
3245	/* If no address match, use the default if it exists. */
3246	if ((nxo == NULL) && (nx->nx_flags & NX_DEFAULTEXPORT))
3247		nxo = &nx->nx_defopt;
3248	return (nxo);
3249}
3250
3251/* find an export for the given handle */
3252struct nfs_export *
3253nfsrv_fhtoexport(struct nfs_filehandle *nfhp)
3254{
3255	struct nfs_exphandle *nxh = (struct nfs_exphandle*)nfhp->nfh_fhp;
3256	struct nfs_export *nx;
3257	uint32_t fsid, expid;
3258
3259	if (!nfsrv_export_hashtbl)
3260		return (NULL);
3261	fsid = ntohl(nxh->nxh_fsid);
3262	expid = ntohl(nxh->nxh_expid);
3263	nx = NFSRVEXPHASH(fsid, expid)->lh_first;
3264	for (; nx; nx = LIST_NEXT(nx, nx_hash)) {
3265		if (nx->nx_fs->nxfs_id != fsid)
3266			continue;
3267		if (nx->nx_id != expid)
3268			continue;
3269		break;
3270	}
3271	return (nx);
3272}
3273
3274/*
3275 * nfsrv_fhtovp() - convert FH to vnode and export info
3276 */
3277int
3278nfsrv_fhtovp(
3279	struct nfs_filehandle *nfhp,
3280	struct nfsrv_descript *nd,
3281	vnode_t *vpp,
3282	struct nfs_export **nxp,
3283	struct nfs_export_options **nxop)
3284{
3285	struct nfs_exphandle *nxh = (struct nfs_exphandle*)nfhp->nfh_fhp;
3286	struct nfs_export_options *nxo;
3287	u_char *fidp;
3288	int error;
3289	struct mount *mp;
3290	mbuf_t nam = NULL;
3291	uint32_t v;
3292	int i, valid;
3293
3294	*vpp = NULL;
3295	*nxp = NULL;
3296	*nxop = NULL;
3297
3298	if (nd != NULL)
3299		nam = nd->nd_nam;
3300
3301	v = ntohl(nxh->nxh_version);
3302	if (v != NFS_FH_VERSION) {
3303		/* file handle format not supported */
3304		return (ESTALE);
3305	}
3306	if (nfhp->nfh_len > NFSV3_MAX_FH_SIZE)
3307		return (EBADRPC);
3308	if (nfhp->nfh_len < (int)sizeof(struct nfs_exphandle))
3309		return (ESTALE);
3310	v = ntohs(nxh->nxh_flags);
3311	if (v & NXHF_INVALIDFH)
3312		return (ESTALE);
3313
3314	*nxp = nfsrv_fhtoexport(nfhp);
3315	if (!*nxp)
3316		return (ESTALE);
3317
3318	/* Get the export option structure for this <export, client> tuple. */
3319	*nxop = nxo = nfsrv_export_lookup(*nxp, nam);
3320	if (nam && (*nxop == NULL))
3321		return (EACCES);
3322
3323	if (nd != NULL) {
3324		/* Validate the security flavor of the request */
3325		for (i = 0, valid = 0; i < nxo->nxo_sec.count; i++) {
3326			if (nd->nd_sec == nxo->nxo_sec.flavors[i]) {
3327				valid = 1;
3328				break;
3329			}
3330		}
3331		if (!valid) {
3332			/*
3333			 * RFC 2623 section 2.3.2 recommends no authentication
3334			 * requirement for certain NFS procedures used for mounting.
3335			 * This allows an unauthenticated superuser on the client
3336			 * to do mounts for the benefit of authenticated users.
3337			 */
3338			if (nd->nd_vers == NFS_VER2)
3339				if (nd->nd_procnum == NFSV2PROC_GETATTR ||
3340				    nd->nd_procnum == NFSV2PROC_STATFS)
3341					valid = 1;
3342			if (nd->nd_vers == NFS_VER3)
3343				if (nd->nd_procnum == NFSPROC_FSINFO)
3344					valid = 1;
3345
3346			if (!valid)
3347				return (NFSERR_AUTHERR | AUTH_REJECTCRED);
3348		}
3349	}
3350
3351	if (nxo && (nxo->nxo_flags & NX_OFFLINE))
3352		return ((nd == NULL || nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER);
3353
3354	/* find mount structure */
3355	mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path);
3356	if (mp) {
3357		error = vfs_busy(mp, LK_NOWAIT);
3358		mount_iterdrop(mp);
3359		if (error)
3360			mp = NULL;
3361	}
3362	if (!mp) {
3363		/*
3364		 * We have an export, but no mount?
3365		 * Perhaps the export just hasn't been marked offline yet.
3366		 */
3367		return ((nd == NULL || nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER);
3368	}
3369
3370	fidp = nfhp->nfh_fhp + sizeof(*nxh);
3371	error = VFS_FHTOVP(mp, nxh->nxh_fidlen, fidp, vpp, NULL);
3372	vfs_unbusy(mp);
3373	if (error)
3374		return (error);
3375	/* vnode pointer should be good at this point or ... */
3376	if (*vpp == NULL)
3377		return (ESTALE);
3378	return (0);
3379}
3380
3381/*
3382 * nfsrv_credcheck() - check/map credentials according
3383 * to given export options.
3384 */
3385int
3386nfsrv_credcheck(
3387	struct nfsrv_descript *nd,
3388	vfs_context_t ctx,
3389	__unused struct nfs_export *nx,
3390	struct nfs_export_options *nxo)
3391{
3392	if (nxo && nxo->nxo_cred) {
3393		if ((nxo->nxo_flags & NX_MAPALL) ||
3394		    ((nxo->nxo_flags & NX_MAPROOT) && !suser(nd->nd_cr, NULL))) {
3395			kauth_cred_ref(nxo->nxo_cred);
3396			kauth_cred_unref(&nd->nd_cr);
3397			nd->nd_cr = nxo->nxo_cred;
3398		}
3399	}
3400	ctx->vc_ucred = nd->nd_cr;
3401	return (0);
3402}
3403
3404/*
3405 * nfsrv_vptofh() - convert vnode to file handle for given export
3406 *
3407 * If the caller is passing in a vnode for a ".." directory entry,
3408 * they can pass a directory NFS file handle (dnfhp) which will be
3409 * checked against the root export file handle.  If it matches, we
3410 * refuse to provide the file handle for the out-of-export directory.
3411 */
3412int
3413nfsrv_vptofh(
3414	struct nfs_export *nx,
3415	int nfsvers,
3416	struct nfs_filehandle *dnfhp,
3417	vnode_t vp,
3418	vfs_context_t ctx,
3419	struct nfs_filehandle *nfhp)
3420{
3421	int error;
3422	uint32_t maxfidsize;
3423
3424	nfhp->nfh_fhp = (u_char*)&nfhp->nfh_xh;
3425	nfhp->nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
3426	nfhp->nfh_xh.nxh_fsid = htonl(nx->nx_fs->nxfs_id);
3427	nfhp->nfh_xh.nxh_expid = htonl(nx->nx_id);
3428	nfhp->nfh_xh.nxh_flags = 0;
3429	nfhp->nfh_xh.nxh_reserved = 0;
3430
3431	if (nfsvers == NFS_VER2)
3432		bzero(&nfhp->nfh_fid[0], NFSV2_MAX_FID_SIZE);
3433
3434	/* if directory FH matches export root, return invalid FH */
3435	if (dnfhp && nfsrv_fhmatch(dnfhp, &nx->nx_fh)) {
3436		if (nfsvers == NFS_VER2)
3437			nfhp->nfh_len = NFSX_V2FH;
3438		else
3439			nfhp->nfh_len = sizeof(nfhp->nfh_xh);
3440		nfhp->nfh_xh.nxh_fidlen = 0;
3441		nfhp->nfh_xh.nxh_flags = htons(NXHF_INVALIDFH);
3442		return (0);
3443	}
3444
3445	if (nfsvers == NFS_VER2)
3446		maxfidsize = NFSV2_MAX_FID_SIZE;
3447	else
3448		maxfidsize = NFSV3_MAX_FID_SIZE;
3449	nfhp->nfh_len = maxfidsize;
3450
3451	error = VFS_VPTOFH(vp, (int*)&nfhp->nfh_len, &nfhp->nfh_fid[0], ctx);
3452	if (error)
3453		return (error);
3454	if (nfhp->nfh_len > maxfidsize)
3455		return (EOVERFLOW);
3456	nfhp->nfh_xh.nxh_fidlen = nfhp->nfh_len;
3457	nfhp->nfh_len += sizeof(nfhp->nfh_xh);
3458	if ((nfsvers == NFS_VER2) && (nfhp->nfh_len < NFSX_V2FH))
3459		nfhp->nfh_len = NFSX_V2FH;
3460
3461	return (0);
3462}
3463
3464/*
3465 * Compare two file handles to see it they're the same.
3466 * Note that we don't use nfh_len because that may include
3467 * padding in an NFSv2 file handle.
3468 */
3469int
3470nfsrv_fhmatch(struct nfs_filehandle *fh1, struct nfs_filehandle *fh2)
3471{
3472	struct nfs_exphandle *nxh1, *nxh2;
3473	int len1, len2;
3474
3475	nxh1 = (struct nfs_exphandle *)fh1->nfh_fhp;
3476	nxh2 = (struct nfs_exphandle *)fh2->nfh_fhp;
3477	len1 = sizeof(fh1->nfh_xh) + nxh1->nxh_fidlen;
3478	len2 = sizeof(fh2->nfh_xh) + nxh2->nxh_fidlen;
3479	if (len1 != len2)
3480		return (0);
3481	if (bcmp(nxh1, nxh2, len1))
3482		return (0);
3483	return (1);
3484}
3485
3486/*
3487 * Functions for dealing with active user lists
3488 */
3489
3490/*
3491 * Search the hash table for a user node with a matching IP address and uid field.
3492 * If found, the node's tm_last timestamp is updated and the node is returned.
3493 *
3494 * If not found, a new node is allocated (or reclaimed via LRU), initialized, and returned.
3495 * Returns NULL if a new node could not be allcoated.
3496 *
3497 * The list's user_mutex lock MUST be held.
3498 */
3499struct nfs_user_stat_node *
3500nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr *saddr, uid_t uid)
3501{
3502	struct nfs_user_stat_node		*unode;
3503	struct timeval				now;
3504	struct nfs_user_stat_hashtbl_head	*head;
3505
3506	/* seach the hash table */
3507	head = NFS_USER_STAT_HASH(list->user_hashtbl, uid);
3508	LIST_FOREACH(unode, head, hash_link) {
3509		if ((uid == unode->uid) && (nfs_sockaddr_cmp(saddr, (struct sockaddr*)&unode->sock) == 0)) {
3510			/* found matching node */
3511			break;
3512		}
3513	}
3514
3515	if (unode) {
3516		/* found node in the hash table, now update lru position */
3517		TAILQ_REMOVE(&list->user_lru, unode, lru_link);
3518		TAILQ_INSERT_TAIL(&list->user_lru, unode, lru_link);
3519
3520		/* update time stamp */
3521		microtime(&now);
3522		unode->tm_last = (uint32_t)now.tv_sec;
3523		return unode;
3524	}
3525
3526	if (list->node_count < nfsrv_user_stat_max_nodes) {
3527		/* Allocate a new node */
3528		MALLOC(unode, struct nfs_user_stat_node *, sizeof(struct nfs_user_stat_node),
3529			M_TEMP, M_WAITOK | M_ZERO);
3530
3531		if (!unode)
3532			return NULL;
3533
3534		/* increment node count */
3535		OSAddAtomic(1, &nfsrv_user_stat_node_count);
3536		list->node_count++;
3537	} else {
3538		/* reuse the oldest node in the lru list */
3539		unode = TAILQ_FIRST(&list->user_lru);
3540
3541		if (!unode)
3542			return NULL;
3543
3544		/* Remove the node */
3545		TAILQ_REMOVE(&list->user_lru, unode, lru_link);
3546		LIST_REMOVE(unode, hash_link);
3547	}
3548
3549	/* Initialize the node */
3550	unode->uid = uid;
3551	bcopy(saddr, &unode->sock, saddr->sa_len);
3552	microtime(&now);
3553	unode->ops = 0;
3554	unode->bytes_read = 0;
3555	unode->bytes_written = 0;
3556	unode->tm_start = (uint32_t)now.tv_sec;
3557	unode->tm_last = (uint32_t)now.tv_sec;
3558
3559	/* insert the node  */
3560	TAILQ_INSERT_TAIL(&list->user_lru, unode, lru_link);
3561	LIST_INSERT_HEAD(head, unode, hash_link);
3562
3563	return unode;
3564}
3565
3566void
3567nfsrv_update_user_stat(struct nfs_export *nx, struct nfsrv_descript *nd, uid_t uid, u_int ops, u_int rd_bytes, u_int wr_bytes)
3568{
3569	struct nfs_user_stat_node	*unode;
3570	struct nfs_active_user_list	*ulist;
3571	struct sockaddr			*saddr;
3572
3573	if ((!nfsrv_user_stat_enabled) || (!nx) || (!nd) || (!nd->nd_nam))
3574		return;
3575
3576	saddr = (struct sockaddr *)mbuf_data(nd->nd_nam);
3577
3578	/* check address family before going any further */
3579	if ((saddr->sa_family != AF_INET) && (saddr->sa_family != AF_INET6))
3580		return;
3581
3582	ulist = &nx->nx_user_list;
3583
3584	/* lock the active user list */
3585	lck_mtx_lock(&ulist->user_mutex);
3586
3587	/* get the user node */
3588	unode = nfsrv_get_user_stat_node(ulist, saddr, uid);
3589
3590	if (!unode) {
3591		lck_mtx_unlock(&ulist->user_mutex);
3592		return;
3593	}
3594
3595	/* update counters */
3596	unode->ops += ops;
3597	unode->bytes_read += rd_bytes;
3598	unode->bytes_written += wr_bytes;
3599
3600	/* done */
3601	lck_mtx_unlock(&ulist->user_mutex);
3602}
3603
3604/* initialize an active user list */
3605void
3606nfsrv_init_user_list(struct nfs_active_user_list *ulist)
3607{
3608	uint i;
3609
3610	/* initialize the lru */
3611	TAILQ_INIT(&ulist->user_lru);
3612
3613	/* initialize the hash table */
3614	for(i = 0; i < NFS_USER_STAT_HASH_SIZE; i++)
3615		LIST_INIT(&ulist->user_hashtbl[i]);
3616	ulist->node_count = 0;
3617
3618	lck_mtx_init(&ulist->user_mutex, nfsrv_active_user_mutex_group, LCK_ATTR_NULL);
3619}
3620
3621/* Free all nodes in an active user list */
3622void
3623nfsrv_free_user_list(struct nfs_active_user_list *ulist)
3624{
3625	struct nfs_user_stat_node *unode;
3626
3627	if (!ulist)
3628		return;
3629
3630	while ((unode = TAILQ_FIRST(&ulist->user_lru))) {
3631		/* Remove node and free */
3632		TAILQ_REMOVE(&ulist->user_lru, unode, lru_link);
3633		LIST_REMOVE(unode, hash_link);
3634		FREE(unode, M_TEMP);
3635
3636		/* decrement node count */
3637		OSAddAtomic(-1, &nfsrv_user_stat_node_count);
3638	}
3639	ulist->node_count = 0;
3640
3641	lck_mtx_destroy(&ulist->user_mutex, nfsrv_active_user_mutex_group);
3642}
3643
3644/* Reclaim old expired user nodes from active user lists. */
3645void
3646nfsrv_active_user_list_reclaim(void)
3647{
3648	struct nfs_exportfs			*nxfs;
3649	struct nfs_export			*nx;
3650	struct nfs_active_user_list		*ulist;
3651	struct nfs_user_stat_hashtbl_head	oldlist;
3652	struct nfs_user_stat_node		*unode, *unode_next;
3653	struct timeval				now;
3654	uint32_t				tstale;
3655
3656	LIST_INIT(&oldlist);
3657
3658	lck_rw_lock_shared(&nfsrv_export_rwlock);
3659	microtime(&now);
3660	tstale = now.tv_sec - nfsrv_user_stat_max_idle_sec;
3661	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
3662		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
3663			/* Scan through all user nodes of this export */
3664			ulist = &nx->nx_user_list;
3665			lck_mtx_lock(&ulist->user_mutex);
3666			for (unode = TAILQ_FIRST(&ulist->user_lru); unode; unode = unode_next) {
3667				unode_next = TAILQ_NEXT(unode, lru_link);
3668
3669				/* check if this node has expired */
3670				if (unode->tm_last >= tstale)
3671					break;
3672
3673				/* Remove node from the active user list */
3674				TAILQ_REMOVE(&ulist->user_lru, unode, lru_link);
3675				LIST_REMOVE(unode, hash_link);
3676
3677				/* Add node to temp list */
3678				LIST_INSERT_HEAD(&oldlist, unode, hash_link);
3679
3680				/* decrement node count */
3681				OSAddAtomic(-1, &nfsrv_user_stat_node_count);
3682				ulist->node_count--;
3683			}
3684			/* can unlock this export's list now */
3685			lck_mtx_unlock(&ulist->user_mutex);
3686		}
3687	}
3688        lck_rw_done(&nfsrv_export_rwlock);
3689
3690	/* Free expired nodes */
3691        while ((unode = LIST_FIRST(&oldlist))) {
3692		LIST_REMOVE(unode, hash_link);
3693		FREE(unode, M_TEMP);
3694	}
3695}
3696
3697/*
3698 * Maps errno values to nfs error numbers.
3699 * Use NFSERR_IO as the catch all for ones not specifically defined in
3700 * RFC 1094.
3701 */
3702static u_char nfsrv_v2errmap[] = {
3703  NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
3704  NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
3705  NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
3706  NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
3707  NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
3708  NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
3709  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
3710  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
3711  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
3712  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
3713  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
3714  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
3715  NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
3716  NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
3717};
3718
3719/*
3720 * Maps errno values to nfs error numbers.
3721 * Although it is not obvious whether or not NFS clients really care if
3722 * a returned error value is in the specified list for the procedure, the
3723 * safest thing to do is filter them appropriately. For Version 2, the
3724 * X/Open XNFS document is the only specification that defines error values
3725 * for each RPC (The RFC simply lists all possible error values for all RPCs),
3726 * so I have decided to not do this for Version 2.
3727 * The first entry is the default error return and the rest are the valid
3728 * errors for that RPC in increasing numeric order.
3729 */
3730static short nfsv3err_null[] = {
3731	0,
3732	0,
3733};
3734
3735static short nfsv3err_getattr[] = {
3736	NFSERR_IO,
3737	NFSERR_IO,
3738	NFSERR_STALE,
3739	NFSERR_BADHANDLE,
3740	NFSERR_SERVERFAULT,
3741	NFSERR_TRYLATER,
3742	0,
3743};
3744
3745static short nfsv3err_setattr[] = {
3746	NFSERR_IO,
3747	NFSERR_PERM,
3748	NFSERR_IO,
3749	NFSERR_ACCES,
3750	NFSERR_INVAL,
3751	NFSERR_NOSPC,
3752	NFSERR_ROFS,
3753	NFSERR_DQUOT,
3754	NFSERR_STALE,
3755	NFSERR_BADHANDLE,
3756	NFSERR_NOT_SYNC,
3757	NFSERR_SERVERFAULT,
3758	NFSERR_TRYLATER,
3759	0,
3760};
3761
3762static short nfsv3err_lookup[] = {
3763	NFSERR_IO,
3764	NFSERR_NOENT,
3765	NFSERR_IO,
3766	NFSERR_ACCES,
3767	NFSERR_NOTDIR,
3768	NFSERR_NAMETOL,
3769	NFSERR_STALE,
3770	NFSERR_BADHANDLE,
3771	NFSERR_SERVERFAULT,
3772	NFSERR_TRYLATER,
3773	0,
3774};
3775
3776static short nfsv3err_access[] = {
3777	NFSERR_IO,
3778	NFSERR_IO,
3779	NFSERR_STALE,
3780	NFSERR_BADHANDLE,
3781	NFSERR_SERVERFAULT,
3782	NFSERR_TRYLATER,
3783	0,
3784};
3785
3786static short nfsv3err_readlink[] = {
3787	NFSERR_IO,
3788	NFSERR_IO,
3789	NFSERR_ACCES,
3790	NFSERR_INVAL,
3791	NFSERR_STALE,
3792	NFSERR_BADHANDLE,
3793	NFSERR_NOTSUPP,
3794	NFSERR_SERVERFAULT,
3795	NFSERR_TRYLATER,
3796	0,
3797};
3798
3799static short nfsv3err_read[] = {
3800	NFSERR_IO,
3801	NFSERR_IO,
3802	NFSERR_NXIO,
3803	NFSERR_ACCES,
3804	NFSERR_INVAL,
3805	NFSERR_STALE,
3806	NFSERR_BADHANDLE,
3807	NFSERR_SERVERFAULT,
3808	NFSERR_TRYLATER,
3809	0,
3810};
3811
3812static short nfsv3err_write[] = {
3813	NFSERR_IO,
3814	NFSERR_IO,
3815	NFSERR_ACCES,
3816	NFSERR_INVAL,
3817	NFSERR_FBIG,
3818	NFSERR_NOSPC,
3819	NFSERR_ROFS,
3820	NFSERR_DQUOT,
3821	NFSERR_STALE,
3822	NFSERR_BADHANDLE,
3823	NFSERR_SERVERFAULT,
3824	NFSERR_TRYLATER,
3825	0,
3826};
3827
3828static short nfsv3err_create[] = {
3829	NFSERR_IO,
3830	NFSERR_IO,
3831	NFSERR_ACCES,
3832	NFSERR_EXIST,
3833	NFSERR_NOTDIR,
3834	NFSERR_NOSPC,
3835	NFSERR_ROFS,
3836	NFSERR_NAMETOL,
3837	NFSERR_DQUOT,
3838	NFSERR_STALE,
3839	NFSERR_BADHANDLE,
3840	NFSERR_NOTSUPP,
3841	NFSERR_SERVERFAULT,
3842	NFSERR_TRYLATER,
3843	0,
3844};
3845
3846static short nfsv3err_mkdir[] = {
3847	NFSERR_IO,
3848	NFSERR_IO,
3849	NFSERR_ACCES,
3850	NFSERR_EXIST,
3851	NFSERR_NOTDIR,
3852	NFSERR_NOSPC,
3853	NFSERR_ROFS,
3854	NFSERR_NAMETOL,
3855	NFSERR_DQUOT,
3856	NFSERR_STALE,
3857	NFSERR_BADHANDLE,
3858	NFSERR_NOTSUPP,
3859	NFSERR_SERVERFAULT,
3860	NFSERR_TRYLATER,
3861	0,
3862};
3863
3864static short nfsv3err_symlink[] = {
3865	NFSERR_IO,
3866	NFSERR_IO,
3867	NFSERR_ACCES,
3868	NFSERR_EXIST,
3869	NFSERR_NOTDIR,
3870	NFSERR_NOSPC,
3871	NFSERR_ROFS,
3872	NFSERR_NAMETOL,
3873	NFSERR_DQUOT,
3874	NFSERR_STALE,
3875	NFSERR_BADHANDLE,
3876	NFSERR_NOTSUPP,
3877	NFSERR_SERVERFAULT,
3878	NFSERR_TRYLATER,
3879	0,
3880};
3881
3882static short nfsv3err_mknod[] = {
3883	NFSERR_IO,
3884	NFSERR_IO,
3885	NFSERR_ACCES,
3886	NFSERR_EXIST,
3887	NFSERR_NOTDIR,
3888	NFSERR_NOSPC,
3889	NFSERR_ROFS,
3890	NFSERR_NAMETOL,
3891	NFSERR_DQUOT,
3892	NFSERR_STALE,
3893	NFSERR_BADHANDLE,
3894	NFSERR_NOTSUPP,
3895	NFSERR_SERVERFAULT,
3896	NFSERR_BADTYPE,
3897	NFSERR_TRYLATER,
3898	0,
3899};
3900
3901static short nfsv3err_remove[] = {
3902	NFSERR_IO,
3903	NFSERR_NOENT,
3904	NFSERR_IO,
3905	NFSERR_ACCES,
3906	NFSERR_NOTDIR,
3907	NFSERR_ROFS,
3908	NFSERR_NAMETOL,
3909	NFSERR_STALE,
3910	NFSERR_BADHANDLE,
3911	NFSERR_SERVERFAULT,
3912	NFSERR_TRYLATER,
3913	0,
3914};
3915
3916static short nfsv3err_rmdir[] = {
3917	NFSERR_IO,
3918	NFSERR_NOENT,
3919	NFSERR_IO,
3920	NFSERR_ACCES,
3921	NFSERR_EXIST,
3922	NFSERR_NOTDIR,
3923	NFSERR_INVAL,
3924	NFSERR_ROFS,
3925	NFSERR_NAMETOL,
3926	NFSERR_NOTEMPTY,
3927	NFSERR_STALE,
3928	NFSERR_BADHANDLE,
3929	NFSERR_NOTSUPP,
3930	NFSERR_SERVERFAULT,
3931	NFSERR_TRYLATER,
3932	0,
3933};
3934
3935static short nfsv3err_rename[] = {
3936	NFSERR_IO,
3937	NFSERR_NOENT,
3938	NFSERR_IO,
3939	NFSERR_ACCES,
3940	NFSERR_EXIST,
3941	NFSERR_XDEV,
3942	NFSERR_NOTDIR,
3943	NFSERR_ISDIR,
3944	NFSERR_INVAL,
3945	NFSERR_NOSPC,
3946	NFSERR_ROFS,
3947	NFSERR_MLINK,
3948	NFSERR_NAMETOL,
3949	NFSERR_NOTEMPTY,
3950	NFSERR_DQUOT,
3951	NFSERR_STALE,
3952	NFSERR_BADHANDLE,
3953	NFSERR_NOTSUPP,
3954	NFSERR_SERVERFAULT,
3955	NFSERR_TRYLATER,
3956	0,
3957};
3958
3959static short nfsv3err_link[] = {
3960	NFSERR_IO,
3961	NFSERR_IO,
3962	NFSERR_ACCES,
3963	NFSERR_EXIST,
3964	NFSERR_XDEV,
3965	NFSERR_NOTDIR,
3966	NFSERR_INVAL,
3967	NFSERR_NOSPC,
3968	NFSERR_ROFS,
3969	NFSERR_MLINK,
3970	NFSERR_NAMETOL,
3971	NFSERR_DQUOT,
3972	NFSERR_STALE,
3973	NFSERR_BADHANDLE,
3974	NFSERR_NOTSUPP,
3975	NFSERR_SERVERFAULT,
3976	NFSERR_TRYLATER,
3977	0,
3978};
3979
3980static short nfsv3err_readdir[] = {
3981	NFSERR_IO,
3982	NFSERR_IO,
3983	NFSERR_ACCES,
3984	NFSERR_NOTDIR,
3985	NFSERR_STALE,
3986	NFSERR_BADHANDLE,
3987	NFSERR_BAD_COOKIE,
3988	NFSERR_TOOSMALL,
3989	NFSERR_SERVERFAULT,
3990	NFSERR_TRYLATER,
3991	0,
3992};
3993
3994static short nfsv3err_readdirplus[] = {
3995	NFSERR_IO,
3996	NFSERR_IO,
3997	NFSERR_ACCES,
3998	NFSERR_NOTDIR,
3999	NFSERR_STALE,
4000	NFSERR_BADHANDLE,
4001	NFSERR_BAD_COOKIE,
4002	NFSERR_NOTSUPP,
4003	NFSERR_TOOSMALL,
4004	NFSERR_SERVERFAULT,
4005	NFSERR_TRYLATER,
4006	0,
4007};
4008
4009static short nfsv3err_fsstat[] = {
4010	NFSERR_IO,
4011	NFSERR_IO,
4012	NFSERR_STALE,
4013	NFSERR_BADHANDLE,
4014	NFSERR_SERVERFAULT,
4015	NFSERR_TRYLATER,
4016	0,
4017};
4018
4019static short nfsv3err_fsinfo[] = {
4020	NFSERR_STALE,
4021	NFSERR_STALE,
4022	NFSERR_BADHANDLE,
4023	NFSERR_SERVERFAULT,
4024	NFSERR_TRYLATER,
4025	0,
4026};
4027
4028static short nfsv3err_pathconf[] = {
4029	NFSERR_STALE,
4030	NFSERR_STALE,
4031	NFSERR_BADHANDLE,
4032	NFSERR_SERVERFAULT,
4033	NFSERR_TRYLATER,
4034	0,
4035};
4036
4037static short nfsv3err_commit[] = {
4038	NFSERR_IO,
4039	NFSERR_IO,
4040	NFSERR_STALE,
4041	NFSERR_BADHANDLE,
4042	NFSERR_SERVERFAULT,
4043	NFSERR_TRYLATER,
4044	0,
4045};
4046
4047static short *nfsrv_v3errmap[] = {
4048	nfsv3err_null,
4049	nfsv3err_getattr,
4050	nfsv3err_setattr,
4051	nfsv3err_lookup,
4052	nfsv3err_access,
4053	nfsv3err_readlink,
4054	nfsv3err_read,
4055	nfsv3err_write,
4056	nfsv3err_create,
4057	nfsv3err_mkdir,
4058	nfsv3err_symlink,
4059	nfsv3err_mknod,
4060	nfsv3err_remove,
4061	nfsv3err_rmdir,
4062	nfsv3err_rename,
4063	nfsv3err_link,
4064	nfsv3err_readdir,
4065	nfsv3err_readdirplus,
4066	nfsv3err_fsstat,
4067	nfsv3err_fsinfo,
4068	nfsv3err_pathconf,
4069	nfsv3err_commit,
4070};
4071
4072/*
4073 * Map errnos to NFS error numbers. For Version 3 also filter out error
4074 * numbers not specified for the associated procedure.
4075 */
4076int
4077nfsrv_errmap(struct nfsrv_descript *nd, int err)
4078{
4079	short *defaulterrp, *errp;
4080
4081	if (nd->nd_vers == NFS_VER2) {
4082		if (err <= (int)sizeof(nfsrv_v2errmap))
4083			return ((int)nfsrv_v2errmap[err - 1]);
4084		return (NFSERR_IO);
4085	}
4086	/* NFSv3 */
4087	if (nd->nd_procnum > NFSPROC_COMMIT)
4088		return (err & 0xffff);
4089	errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
4090	while (*++errp) {
4091		if (*errp == err)
4092			return (err);
4093		else if (*errp > err)
4094			break;
4095	}
4096	return ((int)*defaulterrp);
4097}
4098
4099#endif /* NFSSERVER */
4100
4101