nfs_nfsiod.c revision 11921
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)nfs_syscalls.c	8.3 (Berkeley) 1/4/94
37 * $Id: nfs_syscalls.c,v 1.7 1995/06/27 11:06:50 dfr Exp $
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/file.h>
44#include <sys/stat.h>
45#include <sys/vnode.h>
46#include <sys/mount.h>
47#include <sys/proc.h>
48#include <sys/uio.h>
49#include <sys/malloc.h>
50#include <sys/buf.h>
51#include <sys/mbuf.h>
52#include <sys/socket.h>
53#include <sys/socketvar.h>
54#include <sys/domain.h>
55#include <sys/protosw.h>
56#include <sys/namei.h>
57#include <sys/syslog.h>
58
59#include <netinet/in.h>
60#include <netinet/tcp.h>
61#ifdef ISO
62#include <netiso/iso.h>
63#endif
64#include <nfs/xdr_subs.h>
65#include <nfs/rpcv2.h>
66#include <nfs/nfsproto.h>
67#include <nfs/nfs.h>
68#include <nfs/nfsm_subs.h>
69#include <nfs/nfsrvcache.h>
70#include <nfs/nfsmount.h>
71#include <nfs/nfsnode.h>
72#include <nfs/nqnfs.h>
73#include <nfs/nfsrtt.h>
74
75void	nfsrv_zapsock	__P((struct nfssvc_sock *));
76
77/* Global defs. */
78extern int (*nfsrv3_procs[NFS_NPROCS])();
79extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
80extern int nfs_numasync;
81extern time_t nqnfsstarttime;
82extern int nqsrv_writeslack;
83extern int nfsrtton;
84extern struct nfsstats nfsstats;
85extern int nfsrvw_procrastinate;
86struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
87int nuidhash_max = NFS_MAXUIDHASH;
88static int nfs_numnfsd = 0;
89int nfsd_waiting = 0;
90static int notstarted = 1;
91static int modify_flag = 0;
92static struct nfsdrt nfsdrt;
93void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
94static void nfsd_rt();
95void nfsrv_slpderef();
96
97#define	TRUE	1
98#define	FALSE	0
99
100static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
101/*
102 * NFS server system calls
103 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
104 */
105
106/*
107 * Get file handle system call
108 */
109struct getfh_args {
110	char	*fname;
111	fhandle_t *fhp;
112};
113int
114getfh(p, uap, retval)
115	struct proc *p;
116	register struct getfh_args *uap;
117	int *retval;
118{
119	register struct vnode *vp;
120	fhandle_t fh;
121	int error;
122	struct nameidata nd;
123
124	/*
125	 * Must be super user
126	 */
127	error = suser(p->p_ucred, &p->p_acflag);
128	if(error)
129		return (error);
130	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
131	error = namei(&nd);
132	if (error)
133		return (error);
134	vp = nd.ni_vp;
135	bzero((caddr_t)&fh, sizeof(fh));
136	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
137	error = VFS_VPTOFH(vp, &fh.fh_fid);
138	vput(vp);
139	if (error)
140		return (error);
141	error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
142	return (error);
143}
144
145/*
146 * Nfs server psuedo system call for the nfsd's
147 * Based on the flag value it either:
148 * - adds a socket to the selection list
149 * - remains in the kernel as an nfsd
150 * - remains in the kernel as an nfsiod
151 */
152struct nfssvc_args {
153	int flag;
154	caddr_t argp;
155};
156int
157nfssvc(p, uap, retval)
158	struct proc *p;
159	register struct nfssvc_args *uap;
160	int *retval;
161{
162	struct nameidata nd;
163	struct file *fp;
164	struct mbuf *nam;
165	struct nfsd_args nfsdarg;
166	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
167	struct nfsd_cargs ncd;
168	struct nfsd *nfsd;
169	struct nfssvc_sock *slp;
170	struct nfsuid *nuidp;
171	struct nfsmount *nmp;
172	int error;
173
174	/*
175	 * Must be super user
176	 */
177	error = suser(p->p_ucred, &p->p_acflag);
178	if(error)
179		return (error);
180	while (nfssvc_sockhead_flag & SLP_INIT) {
181		 nfssvc_sockhead_flag |= SLP_WANTINIT;
182		(void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
183	}
184	if (uap->flag & NFSSVC_BIOD)
185		error = nfssvc_iod(p);
186	else if (uap->flag & NFSSVC_MNTD) {
187		error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
188		if (error)
189			return (error);
190		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
191			ncd.ncd_dirp, p);
192		error = namei(&nd);
193		if (error)
194			return (error);
195		if ((nd.ni_vp->v_flag & VROOT) == 0)
196			error = EINVAL;
197		nmp = VFSTONFS(nd.ni_vp->v_mount);
198		vput(nd.ni_vp);
199		if (error)
200			return (error);
201		if ((nmp->nm_flag & NFSMNT_MNTD) &&
202			(uap->flag & NFSSVC_GOTAUTH) == 0)
203			return (0);
204		nmp->nm_flag |= NFSMNT_MNTD;
205		error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
206			uap->argp, p);
207	} else if (uap->flag & NFSSVC_ADDSOCK) {
208		error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
209		if (error)
210			return (error);
211		error = getsock(p->p_fd, nfsdarg.sock, &fp);
212		if (error)
213			return (error);
214		/*
215		 * Get the client address for connected sockets.
216		 */
217		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
218			nam = (struct mbuf *)0;
219		else {
220			error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
221				MT_SONAME);
222			if (error)
223				return (error);
224		}
225		error = nfssvc_addsock(fp, nam);
226	} else {
227		error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
228		if (error)
229			return (error);
230		if ((uap->flag & NFSSVC_AUTHIN) && ((nfsd = nsd->nsd_nfsd)) &&
231			(nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
232			slp = nfsd->nfsd_slp;
233
234			/*
235			 * First check to see if another nfsd has already
236			 * added this credential.
237			 */
238			for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
239			    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
240				if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid &&
241				    (!nfsd->nfsd_nd->nd_nam2 ||
242				     netaddr_match(NU_NETFAM(nuidp),
243				     &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
244					break;
245			}
246			if (nuidp) {
247			    nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr);
248			    nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
249			} else {
250			    /*
251			     * Nope, so we will.
252			     */
253			    if (slp->ns_numuids < nuidhash_max) {
254				slp->ns_numuids++;
255				nuidp = (struct nfsuid *)
256				   malloc(sizeof (struct nfsuid), M_NFSUID,
257					M_WAITOK);
258			    } else
259				nuidp = (struct nfsuid *)0;
260			    if ((slp->ns_flag & SLP_VALID) == 0) {
261				if (nuidp)
262				    free((caddr_t)nuidp, M_NFSUID);
263			    } else {
264				if (nuidp == (struct nfsuid *)0) {
265				    nuidp = slp->ns_uidlruhead.tqh_first;
266				    LIST_REMOVE(nuidp, nu_hash);
267				    TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
268					nu_lru);
269				    if (nuidp->nu_flag & NU_NAM)
270					m_freem(nuidp->nu_nam);
271			        }
272				nuidp->nu_flag = 0;
273				nuidp->nu_cr = nsd->nsd_cr;
274				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
275				    nuidp->nu_cr.cr_ngroups = NGROUPS;
276				nuidp->nu_cr.cr_ref = 1;
277				nuidp->nu_timestamp = nsd->nsd_timestamp;
278				nuidp->nu_expire = time.tv_sec + nsd->nsd_ttl;
279				/*
280				 * and save the session key in nu_key.
281				 */
282				bcopy(nsd->nsd_key, nuidp->nu_key,
283				    sizeof (nsd->nsd_key));
284				if (nfsd->nfsd_nd->nd_nam2) {
285				    struct sockaddr_in *saddr;
286
287				    saddr = mtod(nfsd->nfsd_nd->nd_nam2,
288					 struct sockaddr_in *);
289				    switch (saddr->sin_family) {
290				    case AF_INET:
291					nuidp->nu_flag |= NU_INETADDR;
292					nuidp->nu_inetaddr =
293					     saddr->sin_addr.s_addr;
294					break;
295				    case AF_ISO:
296				    default:
297					nuidp->nu_flag |= NU_NAM;
298					nuidp->nu_nam = m_copym(
299					    nfsd->nfsd_nd->nd_nam2, 0,
300					     M_COPYALL, M_WAIT);
301					break;
302				    };
303				}
304				TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
305					nu_lru);
306				LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
307					nuidp, nu_hash);
308				nfsrv_setcred(&nuidp->nu_cr,
309				    &nfsd->nfsd_nd->nd_cr);
310				nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
311			    }
312			}
313		}
314		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
315			nfsd->nfsd_flag |= NFSD_AUTHFAIL;
316		error = nfssvc_nfsd(nsd, uap->argp, p);
317	}
318	if (error == EINTR || error == ERESTART)
319		error = 0;
320	return (error);
321}
322
323/*
324 * Adds a socket to the list for servicing by nfsds.
325 */
326int
327nfssvc_addsock(fp, mynam)
328	struct file *fp;
329	struct mbuf *mynam;
330{
331	register struct mbuf *m;
332	register int siz;
333	register struct nfssvc_sock *slp;
334	register struct socket *so;
335	struct nfssvc_sock *tslp;
336	int error, s;
337
338	so = (struct socket *)fp->f_data;
339	tslp = (struct nfssvc_sock *)0;
340	/*
341	 * Add it to the list, as required.
342	 */
343	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
344		tslp = nfs_udpsock;
345		if (tslp->ns_flag & SLP_VALID) {
346			m_freem(mynam);
347			return (EPERM);
348		}
349#ifdef ISO
350	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
351		tslp = nfs_cltpsock;
352		if (tslp->ns_flag & SLP_VALID) {
353			m_freem(mynam);
354			return (EPERM);
355		}
356#endif /* ISO */
357	}
358	if (so->so_type == SOCK_STREAM)
359		siz = NFS_MAXPACKET + sizeof (u_long);
360	else
361		siz = NFS_MAXPACKET;
362	error = soreserve(so, siz, siz);
363	if (error) {
364		m_freem(mynam);
365		return (error);
366	}
367
368	/*
369	 * Set protocol specific options { for now TCP only } and
370	 * reserve some space. For datagram sockets, this can get called
371	 * repeatedly for the same socket, but that isn't harmful.
372	 */
373	if (so->so_type == SOCK_STREAM) {
374		MGET(m, M_WAIT, MT_SOOPTS);
375		*mtod(m, int *) = 1;
376		m->m_len = sizeof(int);
377		sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
378	}
379	if (so->so_proto->pr_domain->dom_family == AF_INET &&
380	    so->so_proto->pr_protocol == IPPROTO_TCP) {
381		MGET(m, M_WAIT, MT_SOOPTS);
382		*mtod(m, int *) = 1;
383		m->m_len = sizeof(int);
384		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
385	}
386	so->so_rcv.sb_flags &= ~SB_NOINTR;
387	so->so_rcv.sb_timeo = 0;
388	so->so_snd.sb_flags &= ~SB_NOINTR;
389	so->so_snd.sb_timeo = 0;
390	if (tslp)
391		slp = tslp;
392	else {
393		slp = (struct nfssvc_sock *)
394			malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
395		bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
396		TAILQ_INIT(&slp->ns_uidlruhead);
397		TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
398	}
399	slp->ns_so = so;
400	slp->ns_nam = mynam;
401	fp->f_count++;
402	slp->ns_fp = fp;
403	s = splnet();
404	so->so_upcallarg = (caddr_t)slp;
405	so->so_upcall = nfsrv_rcv;
406	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
407	nfsrv_wakenfsd(slp);
408	splx(s);
409	return (0);
410}
411
412/*
413 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
414 * until it is killed by a signal.
415 */
416int
417nfssvc_nfsd(nsd, argp, p)
418	struct nfsd_srvargs *nsd;
419	caddr_t argp;
420	struct proc *p;
421{
422	register struct mbuf *m;
423	register int siz;
424	register struct nfssvc_sock *slp;
425	register struct socket *so;
426	register int *solockp;
427	struct nfsd *nfsd = nsd->nsd_nfsd;
428	struct nfsrv_descript *nd = NULL;
429	struct mbuf *mreq;
430	int error = 0, cacherep, s, sotype, writes_todo;
431	u_quad_t cur_usec;
432
433#ifndef nolint
434	cacherep = RC_DOIT;
435	writes_todo = 0;
436#endif
437	s = splnet();
438	if (nfsd == (struct nfsd *)0) {
439		nsd->nsd_nfsd = nfsd = (struct nfsd *)
440			malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
441		bzero((caddr_t)nfsd, sizeof (struct nfsd));
442		nfsd->nfsd_procp = p;
443		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
444		nfs_numnfsd++;
445	}
446	/*
447	 * Loop getting rpc requests until SIGKILL.
448	 */
449	for (;;) {
450		if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
451			while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
452			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
453				nfsd->nfsd_flag |= NFSD_WAITING;
454				nfsd_waiting++;
455				error = tsleep((caddr_t)nfsd, PSOCK | PCATCH,
456				    "nfsd", 0);
457				nfsd_waiting--;
458				if (error)
459					goto done;
460			}
461			if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
462			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
463				for (slp = nfssvc_sockhead.tqh_first; slp != 0;
464				    slp = slp->ns_chain.tqe_next) {
465				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
466					== (SLP_VALID | SLP_DOREC)) {
467					    slp->ns_flag &= ~SLP_DOREC;
468					    slp->ns_sref++;
469					    nfsd->nfsd_slp = slp;
470					    break;
471				    }
472				}
473				if (slp == 0)
474					nfsd_head_flag &= ~NFSD_CHECKSLP;
475			}
476			if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0)
477				continue;
478			if (slp->ns_flag & SLP_VALID) {
479				if (slp->ns_flag & SLP_DISCONN)
480					nfsrv_zapsock(slp);
481				else if (slp->ns_flag & SLP_NEEDQ) {
482					slp->ns_flag &= ~SLP_NEEDQ;
483					(void) nfs_sndlock(&slp->ns_solock,
484						(struct nfsreq *)0);
485					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
486						M_WAIT);
487					nfs_sndunlock(&slp->ns_solock);
488				}
489				error = nfsrv_dorec(slp, nfsd, &nd);
490				cur_usec = (u_quad_t)time.tv_sec * 1000000 +
491					(u_quad_t)time.tv_usec;
492				if (error && slp->ns_tq.lh_first &&
493				    slp->ns_tq.lh_first->nd_time <= cur_usec) {
494					error = 0;
495					cacherep = RC_DOIT;
496					writes_todo = 1;
497				} else
498					writes_todo = 0;
499				nfsd->nfsd_flag |= NFSD_REQINPROG;
500			}
501		} else {
502			error = 0;
503			slp = nfsd->nfsd_slp;
504		}
505		if (error || (slp->ns_flag & SLP_VALID) == 0) {
506			if (nd) {
507				free((caddr_t)nd, M_NFSRVDESC);
508				nd = NULL;
509			}
510			nfsd->nfsd_slp = (struct nfssvc_sock *)0;
511			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
512			nfsrv_slpderef(slp);
513			continue;
514		}
515		splx(s);
516		so = slp->ns_so;
517		sotype = so->so_type;
518		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
519			solockp = &slp->ns_solock;
520		else
521			solockp = (int *)0;
522		if (nd) {
523		    nd->nd_starttime = time;
524		    if (nd->nd_nam2)
525			nd->nd_nam = nd->nd_nam2;
526		    else
527			nd->nd_nam = slp->ns_nam;
528
529		    /*
530		     * Check to see if authorization is needed.
531		     */
532		    if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
533			nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
534			nsd->nsd_haddr = mtod(nd->nd_nam,
535			    struct sockaddr_in *)->sin_addr.s_addr;
536			nsd->nsd_authlen = nfsd->nfsd_authlen;
537			nsd->nsd_verflen = nfsd->nfsd_verflen;
538			if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr,
539				nfsd->nfsd_authlen) &&
540			    !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr,
541				nfsd->nfsd_verflen) &&
542			    !copyout((caddr_t)nsd, argp, sizeof (*nsd)))
543			    return (ENEEDAUTH);
544			cacherep = RC_DROPIT;
545		    } else
546			cacherep = nfsrv_getcache(nd, slp, &mreq);
547
548		    /*
549		     * Check for just starting up for NQNFS and send
550		     * fake "try again later" replies to the NQNFS clients.
551		     */
552		    if (notstarted && nqnfsstarttime <= time.tv_sec) {
553			if (modify_flag) {
554				nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
555				modify_flag = 0;
556			} else
557				notstarted = 0;
558		    }
559		    if (notstarted) {
560			if ((nd->nd_flag & ND_NQNFS) == 0)
561				cacherep = RC_DROPIT;
562			else if (nd->nd_procnum != NFSPROC_WRITE) {
563				nd->nd_procnum = NFSPROC_NOOP;
564				nd->nd_repstat = NQNFS_TRYLATER;
565				cacherep = RC_DOIT;
566			} else
567				modify_flag = 1;
568		    } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
569			nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
570			nd->nd_procnum = NFSPROC_NOOP;
571			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
572			cacherep = RC_DOIT;
573		    }
574		}
575
576		/*
577		 * Loop to get all the write rpc relies that have been
578		 * gathered together.
579		 */
580		do {
581		    switch (cacherep) {
582		    case RC_DOIT:
583			if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE &&
584			    nfsrvw_procrastinate > 0 && !notstarted))
585			    error = nfsrv_writegather(&nd, slp,
586				nfsd->nfsd_procp, &mreq);
587			else
588			    error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
589				slp, nfsd->nfsd_procp, &mreq);
590			if (mreq == NULL)
591				break;
592			if (error) {
593				if (nd->nd_procnum != NQNFSPROC_VACATED)
594					nfsstats.srv_errs++;
595				nfsrv_updatecache(nd, FALSE, mreq);
596				if (nd->nd_nam2)
597					m_freem(nd->nd_nam2);
598				break;
599			}
600			nfsstats.srvrpccnt[nd->nd_procnum]++;
601			nfsrv_updatecache(nd, TRUE, mreq);
602			nd->nd_mrep = (struct mbuf *)0;
603		    case RC_REPLY:
604			m = mreq;
605			siz = 0;
606			while (m) {
607				siz += m->m_len;
608				m = m->m_next;
609			}
610			if (siz <= 0 || siz > NFS_MAXPACKET) {
611				printf("mbuf siz=%d\n",siz);
612				panic("Bad nfs svc reply");
613			}
614			m = mreq;
615			m->m_pkthdr.len = siz;
616			m->m_pkthdr.rcvif = (struct ifnet *)0;
617			/*
618			 * For stream protocols, prepend a Sun RPC
619			 * Record Mark.
620			 */
621			if (sotype == SOCK_STREAM) {
622				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
623				*mtod(m, u_long *) = htonl(0x80000000 | siz);
624			}
625			if (solockp)
626				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
627			if (slp->ns_flag & SLP_VALID)
628			    error = nfs_send(so, nd->nd_nam2, m, NULL);
629			else {
630			    error = EPIPE;
631			    m_freem(m);
632			}
633			if (nfsrtton)
634				nfsd_rt(sotype, nd, cacherep);
635			if (nd->nd_nam2)
636				MFREE(nd->nd_nam2, m);
637			if (nd->nd_mrep)
638				m_freem(nd->nd_mrep);
639			if (error == EPIPE)
640				nfsrv_zapsock(slp);
641			if (solockp)
642				nfs_sndunlock(solockp);
643			if (error == EINTR || error == ERESTART) {
644				free((caddr_t)nd, M_NFSRVDESC);
645				nfsrv_slpderef(slp);
646				s = splnet();
647				goto done;
648			}
649			break;
650		    case RC_DROPIT:
651			if (nfsrtton)
652				nfsd_rt(sotype, nd, cacherep);
653			m_freem(nd->nd_mrep);
654			m_freem(nd->nd_nam2);
655			break;
656		    };
657		    if (nd) {
658			FREE((caddr_t)nd, M_NFSRVDESC);
659			nd = NULL;
660		    }
661
662		    /*
663		     * Check to see if there are outstanding writes that
664		     * need to be serviced.
665		     */
666		    cur_usec = (u_quad_t)time.tv_sec * 1000000 +
667			(u_quad_t)time.tv_usec;
668		    s = splsoftclock();
669		    if (slp->ns_tq.lh_first &&
670			slp->ns_tq.lh_first->nd_time <= cur_usec) {
671			cacherep = RC_DOIT;
672			writes_todo = 1;
673		    } else
674			writes_todo = 0;
675		    splx(s);
676		} while (writes_todo);
677		s = splnet();
678		if (nfsrv_dorec(slp, nfsd, &nd)) {
679			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
680			nfsd->nfsd_slp = NULL;
681			nfsrv_slpderef(slp);
682		}
683	}
684done:
685	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
686	splx(s);
687	free((caddr_t)nfsd, M_NFSD);
688	nsd->nsd_nfsd = (struct nfsd *)0;
689	if (--nfs_numnfsd == 0)
690		nfsrv_init(TRUE);	/* Reinitialize everything */
691	return (error);
692}
693
694/*
695 * Asynchronous I/O daemons for client nfs.
696 * They do read-ahead and write-behind operations on the block I/O cache.
697 * Never returns unless it fails or gets killed.
698 */
699int
700nfssvc_iod(p)
701	struct proc *p;
702{
703	register struct buf *bp, *nbp;
704	register int i, myiod;
705	struct vnode *vp;
706	int error = 0, s;
707
708	/*
709	 * Assign my position or return error if too many already running
710	 */
711	myiod = -1;
712	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
713		if (nfs_asyncdaemon[i] == 0) {
714			nfs_asyncdaemon[i]++;
715			myiod = i;
716			break;
717		}
718	if (myiod == -1)
719		return (EBUSY);
720	nfs_numasync++;
721	/*
722	 * Just loop around doin our stuff until SIGKILL
723	 */
724	for (;;) {
725	    while (nfs_bufq.tqh_first == NULL && error == 0) {
726		nfs_iodwant[myiod] = p;
727		error = tsleep((caddr_t)&nfs_iodwant[myiod],
728			PWAIT | PCATCH, "nfsidl", 0);
729	    }
730	    while ((bp = nfs_bufq.tqh_first) != NULL) {
731		/* Take one off the front of the list */
732		TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
733		if (bp->b_flags & B_READ)
734		    (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
735		else do {
736		    /*
737		     * Look for a delayed write for the same vnode, so I can do
738		     * it now. We must grab it before calling nfs_doio() to
739		     * avoid any risk of the vnode getting vclean()'d while
740		     * we are doing the write rpc.
741		     */
742		    vp = bp->b_vp;
743		    s = splbio();
744		    for (nbp = vp->v_dirtyblkhd.lh_first; nbp;
745			nbp = nbp->b_vnbufs.le_next) {
746			if ((nbp->b_flags &
747			    (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI)
748			    continue;
749			bremfree(nbp);
750			vfs_busy_pages(nbp, 1);
751			nbp->b_flags |= (B_BUSY|B_ASYNC);
752			break;
753		    }
754		    splx(s);
755		    /*
756		     * For the delayed write, do the first part of nfs_bwrite()
757		     * up to, but not including nfs_strategy().
758		     */
759		    if (nbp) {
760			nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
761			reassignbuf(nbp, nbp->b_vp);
762			nbp->b_vp->v_numoutput++;
763		    }
764		    (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
765		} while (bp = nbp);
766	    }
767	    if (error) {
768		nfs_asyncdaemon[myiod] = 0;
769		nfs_numasync--;
770		return (error);
771	    }
772	}
773}
774
775/*
776 * Shut down a socket associated with an nfssvc_sock structure.
777 * Should be called with the send lock set, if required.
778 * The trick here is to increment the sref at the start, so that the nfsds
779 * will stop using it and clear ns_flag at the end so that it will not be
780 * reassigned during cleanup.
781 */
782void
783nfsrv_zapsock(slp)
784	register struct nfssvc_sock *slp;
785{
786	register struct nfsuid *nuidp, *nnuidp;
787	register struct nfsrv_descript *nwp, *nnwp;
788	struct socket *so;
789	struct file *fp;
790	struct mbuf *m;
791	int s;
792
793	slp->ns_flag &= ~SLP_ALLFLAGS;
794	fp = slp->ns_fp;
795	if (fp) {
796		slp->ns_fp = (struct file *)0;
797		so = slp->ns_so;
798		so->so_upcall = NULL;
799		soshutdown(so, 2);
800		closef(fp, (struct proc *)0);
801		if (slp->ns_nam)
802			MFREE(slp->ns_nam, m);
803		m_freem(slp->ns_raw);
804		m_freem(slp->ns_rec);
805		for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0;
806		    nuidp = nnuidp) {
807			nnuidp = nuidp->nu_lru.tqe_next;
808			LIST_REMOVE(nuidp, nu_hash);
809			TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
810			if (nuidp->nu_flag & NU_NAM)
811				m_freem(nuidp->nu_nam);
812			free((caddr_t)nuidp, M_NFSUID);
813		}
814		s = splsoftclock();
815		for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
816			nnwp = nwp->nd_tq.le_next;
817			LIST_REMOVE(nwp, nd_tq);
818			free((caddr_t)nwp, M_NFSRVDESC);
819		}
820		LIST_INIT(&slp->ns_tq);
821		splx(s);
822	}
823}
824
825/*
826 * Get an authorization string for the uid by having the mount_nfs sitting
827 * on this mount point porpous out of the kernel and do it.
828 */
829int
830nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key)
831	register struct nfsmount *nmp;
832	struct nfsreq *rep;
833	struct ucred *cred;
834	char **auth_str;
835	int *auth_len;
836	char *verf_str;
837	int *verf_len;
838	NFSKERBKEY_T key;		/* return session key */
839{
840	int error = 0;
841
842	while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
843		nmp->nm_flag |= NFSMNT_WANTAUTH;
844		(void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
845			"nfsauth1", 2 * hz);
846		error = nfs_sigintr(nmp, rep, rep->r_procp);
847		if (error) {
848			nmp->nm_flag &= ~NFSMNT_WANTAUTH;
849			return (error);
850		}
851	}
852	nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
853	nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
854	nmp->nm_authlen = RPCAUTH_MAXSIZ;
855	nmp->nm_verfstr = verf_str;
856	nmp->nm_verflen = *verf_len;
857	nmp->nm_authuid = cred->cr_uid;
858	wakeup((caddr_t)&nmp->nm_authstr);
859
860	/*
861	 * And wait for mount_nfs to do its stuff.
862	 */
863	while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
864		(void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
865			"nfsauth2", 2 * hz);
866		error = nfs_sigintr(nmp, rep, rep->r_procp);
867	}
868	if (nmp->nm_flag & NFSMNT_AUTHERR) {
869		nmp->nm_flag &= ~NFSMNT_AUTHERR;
870		error = EAUTH;
871	}
872	if (error)
873		free((caddr_t)*auth_str, M_TEMP);
874	else {
875		*auth_len = nmp->nm_authlen;
876		*verf_len = nmp->nm_verflen;
877		bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
878	}
879	nmp->nm_flag &= ~NFSMNT_HASAUTH;
880	nmp->nm_flag |= NFSMNT_WAITAUTH;
881	if (nmp->nm_flag & NFSMNT_WANTAUTH) {
882		nmp->nm_flag &= ~NFSMNT_WANTAUTH;
883		wakeup((caddr_t)&nmp->nm_authtype);
884	}
885	return (error);
886}
887
888/*
889 * Get a nickname authenticator and verifier.
890 */
891int
892nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len)
893	struct nfsmount *nmp;
894	struct ucred *cred;
895	char **auth_str;
896	int *auth_len;
897	char *verf_str;
898	int verf_len;
899{
900	register struct nfsuid *nuidp;
901	register u_long *nickp, *verfp;
902	struct timeval ktvin, ktvout;
903
904#ifdef DIAGNOSTIC
905	if (verf_len < (4 * NFSX_UNSIGNED))
906		panic("nfs_getnickauth verf too small");
907#endif
908	for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first;
909	    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
910		if (nuidp->nu_cr.cr_uid == cred->cr_uid)
911			break;
912	}
913	if (!nuidp || nuidp->nu_expire < time.tv_sec)
914		return (EACCES);
915
916	/*
917	 * Move to the end of the lru list (end of lru == most recently used).
918	 */
919	TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
920	TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
921
922	nickp = (u_long *)malloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
923	*nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
924	*nickp = txdr_unsigned(nuidp->nu_nickname);
925	*auth_str = (char *)nickp;
926	*auth_len = 2 * NFSX_UNSIGNED;
927
928	/*
929	 * Now we must encrypt the verifier and package it up.
930	 */
931	verfp = (u_long *)verf_str;
932	*verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
933	if (time.tv_sec > nuidp->nu_timestamp.tv_sec ||
934	    (time.tv_sec == nuidp->nu_timestamp.tv_sec &&
935	     time.tv_usec > nuidp->nu_timestamp.tv_usec))
936		nuidp->nu_timestamp = time;
937	else
938		nuidp->nu_timestamp.tv_usec++;
939	ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
940	ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
941
942	/*
943	 * Now encrypt the timestamp verifier in ecb mode using the session
944	 * key.
945	 */
946#ifdef NFSKERB
947	XXX
948#endif
949
950	*verfp++ = ktvout.tv_sec;
951	*verfp++ = ktvout.tv_usec;
952	*verfp = 0;
953	return (0);
954}
955
956/*
957 * Save the current nickname in a hash list entry on the mount point.
958 */
959int
960nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep)
961	register struct nfsmount *nmp;
962	struct ucred *cred;
963	int len;
964	NFSKERBKEY_T key;
965	struct mbuf **mdp;
966	char **dposp;
967	struct mbuf *mrep;
968{
969	register struct nfsuid *nuidp;
970	register u_long *tl;
971	register long t1;
972	struct mbuf *md = *mdp;
973	struct timeval ktvin, ktvout;
974	u_long nick;
975	char *dpos = *dposp, *cp2;
976	int deltasec, error = 0;
977
978	if (len == (3 * NFSX_UNSIGNED)) {
979		nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
980		ktvin.tv_sec = *tl++;
981		ktvin.tv_usec = *tl++;
982		nick = fxdr_unsigned(u_long, *tl);
983
984		/*
985		 * Decrypt the timestamp in ecb mode.
986		 */
987#ifdef NFSKERB
988		XXX
989#endif
990		ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
991		ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
992		deltasec = time.tv_sec - ktvout.tv_sec;
993		if (deltasec < 0)
994			deltasec = -deltasec;
995		/*
996		 * If ok, add it to the hash list for the mount point.
997		 */
998		if (deltasec <= NFS_KERBCLOCKSKEW) {
999			if (nmp->nm_numuids < nuidhash_max) {
1000				nmp->nm_numuids++;
1001				nuidp = (struct nfsuid *)
1002				   malloc(sizeof (struct nfsuid), M_NFSUID,
1003					M_WAITOK);
1004			} else {
1005				nuidp = nmp->nm_uidlruhead.tqh_first;
1006				LIST_REMOVE(nuidp, nu_hash);
1007				TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp,
1008					nu_lru);
1009			}
1010			nuidp->nu_flag = 0;
1011			nuidp->nu_cr.cr_uid = cred->cr_uid;
1012			nuidp->nu_expire = time.tv_sec + NFS_KERBTTL;
1013			nuidp->nu_timestamp = ktvout;
1014			nuidp->nu_nickname = nick;
1015			bcopy(key, nuidp->nu_key, sizeof (key));
1016			TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp,
1017				nu_lru);
1018			LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid),
1019				nuidp, nu_hash);
1020		}
1021	} else
1022		nfsm_adv(nfsm_rndup(len));
1023nfsmout:
1024	*mdp = md;
1025	*dposp = dpos;
1026	return (error);
1027}
1028
1029/*
1030 * Derefence a server socket structure. If it has no more references and
1031 * is no longer valid, you can throw it away.
1032 */
1033void
1034nfsrv_slpderef(slp)
1035	register struct nfssvc_sock *slp;
1036{
1037	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
1038		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1039		free((caddr_t)slp, M_NFSSVC);
1040	}
1041}
1042
1043/*
1044 * Initialize the data structures for the server.
1045 * Handshake with any new nfsds starting up to avoid any chance of
1046 * corruption.
1047 */
1048void
1049nfsrv_init(terminating)
1050	int terminating;
1051{
1052	register struct nfssvc_sock *slp, *nslp;
1053
1054	if (nfssvc_sockhead_flag & SLP_INIT)
1055		panic("nfsd init");
1056	nfssvc_sockhead_flag |= SLP_INIT;
1057	if (terminating) {
1058		for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) {
1059			nslp = slp->ns_chain.tqe_next;
1060			if (slp->ns_flag & SLP_VALID)
1061				nfsrv_zapsock(slp);
1062			TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1063			free((caddr_t)slp, M_NFSSVC);
1064		}
1065		nfsrv_cleancache();	/* And clear out server cache */
1066	}
1067
1068	TAILQ_INIT(&nfssvc_sockhead);
1069	nfssvc_sockhead_flag &= ~SLP_INIT;
1070	if (nfssvc_sockhead_flag & SLP_WANTINIT) {
1071		nfssvc_sockhead_flag &= ~SLP_WANTINIT;
1072		wakeup((caddr_t)&nfssvc_sockhead);
1073	}
1074
1075	TAILQ_INIT(&nfsd_head);
1076	nfsd_head_flag &= ~NFSD_CHECKSLP;
1077
1078	nfs_udpsock = (struct nfssvc_sock *)
1079	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1080	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
1081	TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
1082	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
1083
1084	nfs_cltpsock = (struct nfssvc_sock *)
1085	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1086	bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
1087	TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
1088	TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
1089}
1090
1091/*
1092 * Add entries to the server monitor log.
1093 */
1094static void
1095nfsd_rt(sotype, nd, cacherep)
1096	int sotype;
1097	register struct nfsrv_descript *nd;
1098	int cacherep;
1099{
1100	register struct drt *rt;
1101
1102	rt = &nfsdrt.drt[nfsdrt.pos];
1103	if (cacherep == RC_DOIT)
1104		rt->flag = 0;
1105	else if (cacherep == RC_REPLY)
1106		rt->flag = DRT_CACHEREPLY;
1107	else
1108		rt->flag = DRT_CACHEDROP;
1109	if (sotype == SOCK_STREAM)
1110		rt->flag |= DRT_TCP;
1111	if (nd->nd_flag & ND_NQNFS)
1112		rt->flag |= DRT_NQNFS;
1113	else if (nd->nd_flag & ND_NFSV3)
1114		rt->flag |= DRT_NFSV3;
1115	rt->proc = nd->nd_procnum;
1116	if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET)
1117	    rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr;
1118	else
1119	    rt->ipadr = INADDR_ANY;
1120	rt->resptime = ((time.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
1121		(time.tv_usec - nd->nd_starttime.tv_usec);
1122	rt->tstamp = time;
1123	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
1124}
1125