nfs_nfsiod.c revision 15480
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)nfs_syscalls.c	8.3 (Berkeley) 1/4/94
37 * $Id: nfs_syscalls.c,v 1.13 1996/01/13 23:27:58 phk Exp $
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/sysproto.h>
43#include <sys/kernel.h>
44#include <sys/file.h>
45#include <sys/filedesc.h>
46#include <sys/stat.h>
47#include <sys/vnode.h>
48#include <sys/mount.h>
49#include <sys/proc.h>
50#include <sys/uio.h>
51#include <sys/malloc.h>
52#include <sys/buf.h>
53#include <sys/mbuf.h>
54#include <sys/socket.h>
55#include <sys/socketvar.h>
56#include <sys/domain.h>
57#include <sys/protosw.h>
58#include <sys/namei.h>
59#include <sys/syslog.h>
60
61#include <netinet/in.h>
62#include <netinet/tcp.h>
63#ifdef ISO
64#include <netiso/iso.h>
65#endif
66#include <nfs/xdr_subs.h>
67#include <nfs/rpcv2.h>
68#include <nfs/nfsproto.h>
69#include <nfs/nfs.h>
70#include <nfs/nfsm_subs.h>
71#include <nfs/nfsrvcache.h>
72#include <nfs/nfsmount.h>
73#include <nfs/nfsnode.h>
74#include <nfs/nqnfs.h>
75#include <nfs/nfsrtt.h>
76
77/* Global defs. */
78extern int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
79					    struct nfssvc_sock *slp,
80					    struct proc *procp,
81					    struct mbuf **mreqp));
82extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
83extern int nfs_numasync;
84extern time_t nqnfsstarttime;
85extern int nqsrv_writeslack;
86extern int nfsrtton;
87extern struct nfsstats nfsstats;
88extern int nfsrvw_procrastinate;
89struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
90static int nuidhash_max = NFS_MAXUIDHASH;
91
92static void	nfsrv_zapsock __P((struct nfssvc_sock *slp));
93static int	nfssvc_iod __P((struct proc *));
94
95#define	TRUE	1
96#define	FALSE	0
97
98static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
99
100
101#ifndef NFS_NOSERVER
102int nfsd_waiting = 0;
103static struct nfsdrt nfsdrt;
104static int nfs_numnfsd = 0;
105static int notstarted = 1;
106static int modify_flag = 0;
107static void	nfsd_rt __P((int sotype, struct nfsrv_descript *nd,
108			     int cacherep));
109static int	nfssvc_addsock __P((struct file *,struct mbuf *));
110static int	nfssvc_nfsd __P((struct nfsd_srvargs *,caddr_t,struct proc *));
111/*
112 * NFS server system calls
113 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
114 */
115
116/*
117 * Get file handle system call
118 */
119#ifndef _SYS_SYSPROTO_H_
120struct getfh_args {
121	char	*fname;
122	fhandle_t *fhp;
123};
124#endif
125int
126getfh(p, uap, retval)
127	struct proc *p;
128	register struct getfh_args *uap;
129	int *retval;
130{
131	register struct vnode *vp;
132	fhandle_t fh;
133	int error;
134	struct nameidata nd;
135
136	/*
137	 * Must be super user
138	 */
139	error = suser(p->p_ucred, &p->p_acflag);
140	if(error)
141		return (error);
142	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
143	error = namei(&nd);
144	if (error)
145		return (error);
146	vp = nd.ni_vp;
147	bzero((caddr_t)&fh, sizeof(fh));
148	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
149	error = VFS_VPTOFH(vp, &fh.fh_fid);
150	vput(vp);
151	if (error)
152		return (error);
153	error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
154	return (error);
155}
156
157#endif /* NFS_NOSERVER */
158/*
159 * Nfs server psuedo system call for the nfsd's
160 * Based on the flag value it either:
161 * - adds a socket to the selection list
162 * - remains in the kernel as an nfsd
163 * - remains in the kernel as an nfsiod
164 */
165#ifndef _SYS_SYSPROTO_H_
166struct nfssvc_args {
167	int flag;
168	caddr_t argp;
169};
170#endif
171int
172nfssvc(p, uap, retval)
173	struct proc *p;
174	register struct nfssvc_args *uap;
175	int *retval;
176{
177#ifndef NFS_NOSERVER
178	struct nameidata nd;
179	struct file *fp;
180	struct mbuf *nam;
181	struct nfsd_args nfsdarg;
182	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
183	struct nfsd_cargs ncd;
184	struct nfsd *nfsd;
185	struct nfssvc_sock *slp;
186	struct nfsuid *nuidp;
187	struct nfsmount *nmp;
188#endif /* NFS_NOSERVER */
189	int error;
190
191	/*
192	 * Must be super user
193	 */
194	error = suser(p->p_ucred, &p->p_acflag);
195	if(error)
196		return (error);
197	while (nfssvc_sockhead_flag & SLP_INIT) {
198		 nfssvc_sockhead_flag |= SLP_WANTINIT;
199		(void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
200	}
201	if (uap->flag & NFSSVC_BIOD)
202		error = nfssvc_iod(p);
203#ifdef NFS_NOSERVER
204	else
205		error = ENXIO;
206#else /* !NFS_NOSERVER */
207	else if (uap->flag & NFSSVC_MNTD) {
208		error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
209		if (error)
210			return (error);
211		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
212			ncd.ncd_dirp, p);
213		error = namei(&nd);
214		if (error)
215			return (error);
216		if ((nd.ni_vp->v_flag & VROOT) == 0)
217			error = EINVAL;
218		nmp = VFSTONFS(nd.ni_vp->v_mount);
219		vput(nd.ni_vp);
220		if (error)
221			return (error);
222		if ((nmp->nm_flag & NFSMNT_MNTD) &&
223			(uap->flag & NFSSVC_GOTAUTH) == 0)
224			return (0);
225		nmp->nm_flag |= NFSMNT_MNTD;
226		error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
227			uap->argp, p);
228	} else if (uap->flag & NFSSVC_ADDSOCK) {
229		error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
230		if (error)
231			return (error);
232		error = getsock(p->p_fd, nfsdarg.sock, &fp);
233		if (error)
234			return (error);
235		/*
236		 * Get the client address for connected sockets.
237		 */
238		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
239			nam = (struct mbuf *)0;
240		else {
241			error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
242				MT_SONAME);
243			if (error)
244				return (error);
245		}
246		error = nfssvc_addsock(fp, nam);
247	} else {
248		error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
249		if (error)
250			return (error);
251		if ((uap->flag & NFSSVC_AUTHIN) && ((nfsd = nsd->nsd_nfsd)) &&
252			(nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
253			slp = nfsd->nfsd_slp;
254
255			/*
256			 * First check to see if another nfsd has already
257			 * added this credential.
258			 */
259			for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
260			    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
261				if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid &&
262				    (!nfsd->nfsd_nd->nd_nam2 ||
263				     netaddr_match(NU_NETFAM(nuidp),
264				     &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
265					break;
266			}
267			if (nuidp) {
268			    nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr);
269			    nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
270			} else {
271			    /*
272			     * Nope, so we will.
273			     */
274			    if (slp->ns_numuids < nuidhash_max) {
275				slp->ns_numuids++;
276				nuidp = (struct nfsuid *)
277				   malloc(sizeof (struct nfsuid), M_NFSUID,
278					M_WAITOK);
279			    } else
280				nuidp = (struct nfsuid *)0;
281			    if ((slp->ns_flag & SLP_VALID) == 0) {
282				if (nuidp)
283				    free((caddr_t)nuidp, M_NFSUID);
284			    } else {
285				if (nuidp == (struct nfsuid *)0) {
286				    nuidp = slp->ns_uidlruhead.tqh_first;
287				    LIST_REMOVE(nuidp, nu_hash);
288				    TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
289					nu_lru);
290				    if (nuidp->nu_flag & NU_NAM)
291					m_freem(nuidp->nu_nam);
292			        }
293				nuidp->nu_flag = 0;
294				nuidp->nu_cr = nsd->nsd_cr;
295				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
296				    nuidp->nu_cr.cr_ngroups = NGROUPS;
297				nuidp->nu_cr.cr_ref = 1;
298				nuidp->nu_timestamp = nsd->nsd_timestamp;
299				nuidp->nu_expire = time.tv_sec + nsd->nsd_ttl;
300				/*
301				 * and save the session key in nu_key.
302				 */
303				bcopy(nsd->nsd_key, nuidp->nu_key,
304				    sizeof (nsd->nsd_key));
305				if (nfsd->nfsd_nd->nd_nam2) {
306				    struct sockaddr_in *saddr;
307
308				    saddr = mtod(nfsd->nfsd_nd->nd_nam2,
309					 struct sockaddr_in *);
310				    switch (saddr->sin_family) {
311				    case AF_INET:
312					nuidp->nu_flag |= NU_INETADDR;
313					nuidp->nu_inetaddr =
314					     saddr->sin_addr.s_addr;
315					break;
316				    case AF_ISO:
317				    default:
318					nuidp->nu_flag |= NU_NAM;
319					nuidp->nu_nam = m_copym(
320					    nfsd->nfsd_nd->nd_nam2, 0,
321					     M_COPYALL, M_WAIT);
322					break;
323				    };
324				}
325				TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
326					nu_lru);
327				LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
328					nuidp, nu_hash);
329				nfsrv_setcred(&nuidp->nu_cr,
330				    &nfsd->nfsd_nd->nd_cr);
331				nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
332			    }
333			}
334		}
335		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
336			nfsd->nfsd_flag |= NFSD_AUTHFAIL;
337		error = nfssvc_nfsd(nsd, uap->argp, p);
338	}
339#endif /* NFS_NOSERVER */
340	if (error == EINTR || error == ERESTART)
341		error = 0;
342	return (error);
343}
344
345#ifndef NFS_NOSERVER
346/*
347 * Adds a socket to the list for servicing by nfsds.
348 */
349static int
350nfssvc_addsock(fp, mynam)
351	struct file *fp;
352	struct mbuf *mynam;
353{
354	register struct mbuf *m;
355	register int siz;
356	register struct nfssvc_sock *slp;
357	register struct socket *so;
358	struct nfssvc_sock *tslp;
359	int error, s;
360
361	so = (struct socket *)fp->f_data;
362	tslp = (struct nfssvc_sock *)0;
363	/*
364	 * Add it to the list, as required.
365	 */
366	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
367		tslp = nfs_udpsock;
368		if (tslp->ns_flag & SLP_VALID) {
369			m_freem(mynam);
370			return (EPERM);
371		}
372#ifdef ISO
373	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
374		tslp = nfs_cltpsock;
375		if (tslp->ns_flag & SLP_VALID) {
376			m_freem(mynam);
377			return (EPERM);
378		}
379#endif /* ISO */
380	}
381	if (so->so_type == SOCK_STREAM)
382		siz = NFS_MAXPACKET + sizeof (u_long);
383	else
384		siz = NFS_MAXPACKET;
385	error = soreserve(so, siz, siz);
386	if (error) {
387		m_freem(mynam);
388		return (error);
389	}
390
391	/*
392	 * Set protocol specific options { for now TCP only } and
393	 * reserve some space. For datagram sockets, this can get called
394	 * repeatedly for the same socket, but that isn't harmful.
395	 */
396	if (so->so_type == SOCK_STREAM) {
397		MGET(m, M_WAIT, MT_SOOPTS);
398		*mtod(m, int *) = 1;
399		m->m_len = sizeof(int);
400		sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
401	}
402	if (so->so_proto->pr_domain->dom_family == AF_INET &&
403	    so->so_proto->pr_protocol == IPPROTO_TCP) {
404		MGET(m, M_WAIT, MT_SOOPTS);
405		*mtod(m, int *) = 1;
406		m->m_len = sizeof(int);
407		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
408	}
409	so->so_rcv.sb_flags &= ~SB_NOINTR;
410	so->so_rcv.sb_timeo = 0;
411	so->so_snd.sb_flags &= ~SB_NOINTR;
412	so->so_snd.sb_timeo = 0;
413	if (tslp)
414		slp = tslp;
415	else {
416		slp = (struct nfssvc_sock *)
417			malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
418		bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
419		TAILQ_INIT(&slp->ns_uidlruhead);
420		TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
421	}
422	slp->ns_so = so;
423	slp->ns_nam = mynam;
424	fp->f_count++;
425	slp->ns_fp = fp;
426	s = splnet();
427	so->so_upcallarg = (caddr_t)slp;
428	so->so_upcall = nfsrv_rcv;
429	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
430	nfsrv_wakenfsd(slp);
431	splx(s);
432	return (0);
433}
434
435/*
436 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
437 * until it is killed by a signal.
438 */
439static int
440nfssvc_nfsd(nsd, argp, p)
441	struct nfsd_srvargs *nsd;
442	caddr_t argp;
443	struct proc *p;
444{
445	register struct mbuf *m;
446	register int siz;
447	register struct nfssvc_sock *slp;
448	register struct socket *so;
449	register int *solockp;
450	struct nfsd *nfsd = nsd->nsd_nfsd;
451	struct nfsrv_descript *nd = NULL;
452	struct mbuf *mreq;
453	int error = 0, cacherep, s, sotype, writes_todo;
454	u_quad_t cur_usec;
455
456#ifndef nolint
457	cacherep = RC_DOIT;
458	writes_todo = 0;
459#endif
460	s = splnet();
461	if (nfsd == (struct nfsd *)0) {
462		nsd->nsd_nfsd = nfsd = (struct nfsd *)
463			malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
464		bzero((caddr_t)nfsd, sizeof (struct nfsd));
465		nfsd->nfsd_procp = p;
466		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
467		nfs_numnfsd++;
468	}
469	/*
470	 * Loop getting rpc requests until SIGKILL.
471	 */
472	for (;;) {
473		if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
474			while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
475			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
476				nfsd->nfsd_flag |= NFSD_WAITING;
477				nfsd_waiting++;
478				error = tsleep((caddr_t)nfsd, PSOCK | PCATCH,
479				    "nfsd", 0);
480				nfsd_waiting--;
481				if (error)
482					goto done;
483			}
484			if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
485			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
486				for (slp = nfssvc_sockhead.tqh_first; slp != 0;
487				    slp = slp->ns_chain.tqe_next) {
488				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
489					== (SLP_VALID | SLP_DOREC)) {
490					    slp->ns_flag &= ~SLP_DOREC;
491					    slp->ns_sref++;
492					    nfsd->nfsd_slp = slp;
493					    break;
494				    }
495				}
496				if (slp == 0)
497					nfsd_head_flag &= ~NFSD_CHECKSLP;
498			}
499			if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0)
500				continue;
501			if (slp->ns_flag & SLP_VALID) {
502				if (slp->ns_flag & SLP_DISCONN)
503					nfsrv_zapsock(slp);
504				else if (slp->ns_flag & SLP_NEEDQ) {
505					slp->ns_flag &= ~SLP_NEEDQ;
506					(void) nfs_sndlock(&slp->ns_solock,
507						(struct nfsreq *)0);
508					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
509						M_WAIT);
510					nfs_sndunlock(&slp->ns_solock);
511				}
512				error = nfsrv_dorec(slp, nfsd, &nd);
513				cur_usec = (u_quad_t)time.tv_sec * 1000000 +
514					(u_quad_t)time.tv_usec;
515				if (error && slp->ns_tq.lh_first &&
516				    slp->ns_tq.lh_first->nd_time <= cur_usec) {
517					error = 0;
518					cacherep = RC_DOIT;
519					writes_todo = 1;
520				} else
521					writes_todo = 0;
522				nfsd->nfsd_flag |= NFSD_REQINPROG;
523			}
524		} else {
525			error = 0;
526			slp = nfsd->nfsd_slp;
527		}
528		if (error || (slp->ns_flag & SLP_VALID) == 0) {
529			if (nd) {
530				free((caddr_t)nd, M_NFSRVDESC);
531				nd = NULL;
532			}
533			nfsd->nfsd_slp = (struct nfssvc_sock *)0;
534			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
535			nfsrv_slpderef(slp);
536			continue;
537		}
538		splx(s);
539		so = slp->ns_so;
540		sotype = so->so_type;
541		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
542			solockp = &slp->ns_solock;
543		else
544			solockp = (int *)0;
545		if (nd) {
546		    nd->nd_starttime = time;
547		    if (nd->nd_nam2)
548			nd->nd_nam = nd->nd_nam2;
549		    else
550			nd->nd_nam = slp->ns_nam;
551
552		    /*
553		     * Check to see if authorization is needed.
554		     */
555		    if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
556			nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
557			nsd->nsd_haddr = mtod(nd->nd_nam,
558			    struct sockaddr_in *)->sin_addr.s_addr;
559			nsd->nsd_authlen = nfsd->nfsd_authlen;
560			nsd->nsd_verflen = nfsd->nfsd_verflen;
561			if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr,
562				nfsd->nfsd_authlen) &&
563			    !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr,
564				nfsd->nfsd_verflen) &&
565			    !copyout((caddr_t)nsd, argp, sizeof (*nsd)))
566			    return (ENEEDAUTH);
567			cacherep = RC_DROPIT;
568		    } else
569			cacherep = nfsrv_getcache(nd, slp, &mreq);
570
571		    /*
572		     * Check for just starting up for NQNFS and send
573		     * fake "try again later" replies to the NQNFS clients.
574		     */
575		    if (notstarted && nqnfsstarttime <= time.tv_sec) {
576			if (modify_flag) {
577				nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
578				modify_flag = 0;
579			} else
580				notstarted = 0;
581		    }
582		    if (notstarted) {
583			if ((nd->nd_flag & ND_NQNFS) == 0)
584				cacherep = RC_DROPIT;
585			else if (nd->nd_procnum != NFSPROC_WRITE) {
586				nd->nd_procnum = NFSPROC_NOOP;
587				nd->nd_repstat = NQNFS_TRYLATER;
588				cacherep = RC_DOIT;
589			} else
590				modify_flag = 1;
591		    } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
592			nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
593			nd->nd_procnum = NFSPROC_NOOP;
594			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
595			cacherep = RC_DOIT;
596		    }
597		}
598
599		/*
600		 * Loop to get all the write rpc relies that have been
601		 * gathered together.
602		 */
603		do {
604		    switch (cacherep) {
605		    case RC_DOIT:
606			if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE &&
607			    nfsrvw_procrastinate > 0 && !notstarted))
608			    error = nfsrv_writegather(&nd, slp,
609				nfsd->nfsd_procp, &mreq);
610			else
611			    error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
612				slp, nfsd->nfsd_procp, &mreq);
613			if (mreq == NULL)
614				break;
615			if (error) {
616				if (nd->nd_procnum != NQNFSPROC_VACATED)
617					nfsstats.srv_errs++;
618				nfsrv_updatecache(nd, FALSE, mreq);
619				if (nd->nd_nam2)
620					m_freem(nd->nd_nam2);
621				break;
622			}
623			nfsstats.srvrpccnt[nd->nd_procnum]++;
624			nfsrv_updatecache(nd, TRUE, mreq);
625			nd->nd_mrep = (struct mbuf *)0;
626		    case RC_REPLY:
627			m = mreq;
628			siz = 0;
629			while (m) {
630				siz += m->m_len;
631				m = m->m_next;
632			}
633			if (siz <= 0 || siz > NFS_MAXPACKET) {
634				printf("mbuf siz=%d\n",siz);
635				panic("Bad nfs svc reply");
636			}
637			m = mreq;
638			m->m_pkthdr.len = siz;
639			m->m_pkthdr.rcvif = (struct ifnet *)0;
640			/*
641			 * For stream protocols, prepend a Sun RPC
642			 * Record Mark.
643			 */
644			if (sotype == SOCK_STREAM) {
645				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
646				*mtod(m, u_long *) = htonl(0x80000000 | siz);
647			}
648			if (solockp)
649				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
650			if (slp->ns_flag & SLP_VALID)
651			    error = nfs_send(so, nd->nd_nam2, m, NULL);
652			else {
653			    error = EPIPE;
654			    m_freem(m);
655			}
656			if (nfsrtton)
657				nfsd_rt(sotype, nd, cacherep);
658			if (nd->nd_nam2)
659				MFREE(nd->nd_nam2, m);
660			if (nd->nd_mrep)
661				m_freem(nd->nd_mrep);
662			if (error == EPIPE)
663				nfsrv_zapsock(slp);
664			if (solockp)
665				nfs_sndunlock(solockp);
666			if (error == EINTR || error == ERESTART) {
667				free((caddr_t)nd, M_NFSRVDESC);
668				nfsrv_slpderef(slp);
669				s = splnet();
670				goto done;
671			}
672			break;
673		    case RC_DROPIT:
674			if (nfsrtton)
675				nfsd_rt(sotype, nd, cacherep);
676			m_freem(nd->nd_mrep);
677			m_freem(nd->nd_nam2);
678			break;
679		    };
680		    if (nd) {
681			FREE((caddr_t)nd, M_NFSRVDESC);
682			nd = NULL;
683		    }
684
685		    /*
686		     * Check to see if there are outstanding writes that
687		     * need to be serviced.
688		     */
689		    cur_usec = (u_quad_t)time.tv_sec * 1000000 +
690			(u_quad_t)time.tv_usec;
691		    s = splsoftclock();
692		    if (slp->ns_tq.lh_first &&
693			slp->ns_tq.lh_first->nd_time <= cur_usec) {
694			cacherep = RC_DOIT;
695			writes_todo = 1;
696		    } else
697			writes_todo = 0;
698		    splx(s);
699		} while (writes_todo);
700		s = splnet();
701		if (nfsrv_dorec(slp, nfsd, &nd)) {
702			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
703			nfsd->nfsd_slp = NULL;
704			nfsrv_slpderef(slp);
705		}
706	}
707done:
708	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
709	splx(s);
710	free((caddr_t)nfsd, M_NFSD);
711	nsd->nsd_nfsd = (struct nfsd *)0;
712	if (--nfs_numnfsd == 0)
713		nfsrv_init(TRUE);	/* Reinitialize everything */
714	return (error);
715}
716#endif /* NFS_NOSERVER */
717
718/*
719 * Asynchronous I/O daemons for client nfs.
720 * They do read-ahead and write-behind operations on the block I/O cache.
721 * Never returns unless it fails or gets killed.
722 */
723static int
724nfssvc_iod(p)
725	struct proc *p;
726{
727	register struct buf *bp, *nbp;
728	register int i, myiod;
729	struct vnode *vp;
730	int error = 0, s;
731
732	/*
733	 * Assign my position or return error if too many already running
734	 */
735	myiod = -1;
736	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
737		if (nfs_asyncdaemon[i] == 0) {
738			nfs_asyncdaemon[i]++;
739			myiod = i;
740			break;
741		}
742	if (myiod == -1)
743		return (EBUSY);
744	nfs_numasync++;
745	/*
746	 * Just loop around doin our stuff until SIGKILL
747	 */
748	for (;;) {
749	    while (nfs_bufq.tqh_first == NULL && error == 0) {
750		nfs_iodwant[myiod] = p;
751		error = tsleep((caddr_t)&nfs_iodwant[myiod],
752			PWAIT | PCATCH, "nfsidl", 0);
753	    }
754	    while ((bp = nfs_bufq.tqh_first) != NULL) {
755		/* Take one off the front of the list */
756		TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
757		if (bp->b_flags & B_READ)
758		    (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
759		else do {
760		    /*
761		     * Look for a delayed write for the same vnode, so I can do
762		     * it now. We must grab it before calling nfs_doio() to
763		     * avoid any risk of the vnode getting vclean()'d while
764		     * we are doing the write rpc.
765		     */
766		    vp = bp->b_vp;
767		    s = splbio();
768		    for (nbp = vp->v_dirtyblkhd.lh_first; nbp;
769			nbp = nbp->b_vnbufs.le_next) {
770			if ((nbp->b_flags &
771			    (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI)
772			    continue;
773			bremfree(nbp);
774			vfs_busy_pages(nbp, 1);
775			nbp->b_flags |= (B_BUSY|B_ASYNC);
776			break;
777		    }
778		    splx(s);
779		    /*
780		     * For the delayed write, do the first part of nfs_bwrite()
781		     * up to, but not including nfs_strategy().
782		     */
783		    if (nbp) {
784			nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
785			reassignbuf(nbp, nbp->b_vp);
786			nbp->b_vp->v_numoutput++;
787		    }
788		    (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
789		} while (bp = nbp);
790	    }
791	    if (error) {
792		nfs_asyncdaemon[myiod] = 0;
793		nfs_numasync--;
794		return (error);
795	    }
796	}
797}
798
799/*
800 * Shut down a socket associated with an nfssvc_sock structure.
801 * Should be called with the send lock set, if required.
802 * The trick here is to increment the sref at the start, so that the nfsds
803 * will stop using it and clear ns_flag at the end so that it will not be
804 * reassigned during cleanup.
805 */
806static void
807nfsrv_zapsock(slp)
808	register struct nfssvc_sock *slp;
809{
810	register struct nfsuid *nuidp, *nnuidp;
811	register struct nfsrv_descript *nwp, *nnwp;
812	struct socket *so;
813	struct file *fp;
814	struct mbuf *m;
815	int s;
816
817	slp->ns_flag &= ~SLP_ALLFLAGS;
818	fp = slp->ns_fp;
819	if (fp) {
820		slp->ns_fp = (struct file *)0;
821		so = slp->ns_so;
822		so->so_upcall = NULL;
823		soshutdown(so, 2);
824		closef(fp, (struct proc *)0);
825		if (slp->ns_nam)
826			MFREE(slp->ns_nam, m);
827		m_freem(slp->ns_raw);
828		m_freem(slp->ns_rec);
829		for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0;
830		    nuidp = nnuidp) {
831			nnuidp = nuidp->nu_lru.tqe_next;
832			LIST_REMOVE(nuidp, nu_hash);
833			TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
834			if (nuidp->nu_flag & NU_NAM)
835				m_freem(nuidp->nu_nam);
836			free((caddr_t)nuidp, M_NFSUID);
837		}
838		s = splsoftclock();
839		for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
840			nnwp = nwp->nd_tq.le_next;
841			LIST_REMOVE(nwp, nd_tq);
842			free((caddr_t)nwp, M_NFSRVDESC);
843		}
844		LIST_INIT(&slp->ns_tq);
845		splx(s);
846	}
847}
848
849/*
850 * Get an authorization string for the uid by having the mount_nfs sitting
851 * on this mount point porpous out of the kernel and do it.
852 */
853int
854nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key)
855	register struct nfsmount *nmp;
856	struct nfsreq *rep;
857	struct ucred *cred;
858	char **auth_str;
859	int *auth_len;
860	char *verf_str;
861	int *verf_len;
862	NFSKERBKEY_T key;		/* return session key */
863{
864	int error = 0;
865
866	while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
867		nmp->nm_flag |= NFSMNT_WANTAUTH;
868		(void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
869			"nfsauth1", 2 * hz);
870		error = nfs_sigintr(nmp, rep, rep->r_procp);
871		if (error) {
872			nmp->nm_flag &= ~NFSMNT_WANTAUTH;
873			return (error);
874		}
875	}
876	nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
877	nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
878	nmp->nm_authlen = RPCAUTH_MAXSIZ;
879	nmp->nm_verfstr = verf_str;
880	nmp->nm_verflen = *verf_len;
881	nmp->nm_authuid = cred->cr_uid;
882	wakeup((caddr_t)&nmp->nm_authstr);
883
884	/*
885	 * And wait for mount_nfs to do its stuff.
886	 */
887	while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
888		(void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
889			"nfsauth2", 2 * hz);
890		error = nfs_sigintr(nmp, rep, rep->r_procp);
891	}
892	if (nmp->nm_flag & NFSMNT_AUTHERR) {
893		nmp->nm_flag &= ~NFSMNT_AUTHERR;
894		error = EAUTH;
895	}
896	if (error)
897		free((caddr_t)*auth_str, M_TEMP);
898	else {
899		*auth_len = nmp->nm_authlen;
900		*verf_len = nmp->nm_verflen;
901		bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
902	}
903	nmp->nm_flag &= ~NFSMNT_HASAUTH;
904	nmp->nm_flag |= NFSMNT_WAITAUTH;
905	if (nmp->nm_flag & NFSMNT_WANTAUTH) {
906		nmp->nm_flag &= ~NFSMNT_WANTAUTH;
907		wakeup((caddr_t)&nmp->nm_authtype);
908	}
909	return (error);
910}
911
912/*
913 * Get a nickname authenticator and verifier.
914 */
915int
916nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len)
917	struct nfsmount *nmp;
918	struct ucred *cred;
919	char **auth_str;
920	int *auth_len;
921	char *verf_str;
922	int verf_len;
923{
924	register struct nfsuid *nuidp;
925	register u_long *nickp, *verfp;
926	struct timeval ktvin, ktvout;
927
928#ifdef DIAGNOSTIC
929	if (verf_len < (4 * NFSX_UNSIGNED))
930		panic("nfs_getnickauth verf too small");
931#endif
932	for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first;
933	    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
934		if (nuidp->nu_cr.cr_uid == cred->cr_uid)
935			break;
936	}
937	if (!nuidp || nuidp->nu_expire < time.tv_sec)
938		return (EACCES);
939
940	/*
941	 * Move to the end of the lru list (end of lru == most recently used).
942	 */
943	TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
944	TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
945
946	nickp = (u_long *)malloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
947	*nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
948	*nickp = txdr_unsigned(nuidp->nu_nickname);
949	*auth_str = (char *)nickp;
950	*auth_len = 2 * NFSX_UNSIGNED;
951
952	/*
953	 * Now we must encrypt the verifier and package it up.
954	 */
955	verfp = (u_long *)verf_str;
956	*verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
957	if (time.tv_sec > nuidp->nu_timestamp.tv_sec ||
958	    (time.tv_sec == nuidp->nu_timestamp.tv_sec &&
959	     time.tv_usec > nuidp->nu_timestamp.tv_usec))
960		nuidp->nu_timestamp = time;
961	else
962		nuidp->nu_timestamp.tv_usec++;
963	ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
964	ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
965
966	/*
967	 * Now encrypt the timestamp verifier in ecb mode using the session
968	 * key.
969	 */
970#ifdef NFSKERB
971	XXX
972#endif
973
974	*verfp++ = ktvout.tv_sec;
975	*verfp++ = ktvout.tv_usec;
976	*verfp = 0;
977	return (0);
978}
979
980/*
981 * Save the current nickname in a hash list entry on the mount point.
982 */
983int
984nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep)
985	register struct nfsmount *nmp;
986	struct ucred *cred;
987	int len;
988	NFSKERBKEY_T key;
989	struct mbuf **mdp;
990	char **dposp;
991	struct mbuf *mrep;
992{
993	register struct nfsuid *nuidp;
994	register u_long *tl;
995	register long t1;
996	struct mbuf *md = *mdp;
997	struct timeval ktvin, ktvout;
998	u_long nick;
999	char *dpos = *dposp, *cp2;
1000	int deltasec, error = 0;
1001
1002	if (len == (3 * NFSX_UNSIGNED)) {
1003		nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1004		ktvin.tv_sec = *tl++;
1005		ktvin.tv_usec = *tl++;
1006		nick = fxdr_unsigned(u_long, *tl);
1007
1008		/*
1009		 * Decrypt the timestamp in ecb mode.
1010		 */
1011#ifdef NFSKERB
1012		XXX
1013#endif
1014		ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
1015		ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
1016		deltasec = time.tv_sec - ktvout.tv_sec;
1017		if (deltasec < 0)
1018			deltasec = -deltasec;
1019		/*
1020		 * If ok, add it to the hash list for the mount point.
1021		 */
1022		if (deltasec <= NFS_KERBCLOCKSKEW) {
1023			if (nmp->nm_numuids < nuidhash_max) {
1024				nmp->nm_numuids++;
1025				nuidp = (struct nfsuid *)
1026				   malloc(sizeof (struct nfsuid), M_NFSUID,
1027					M_WAITOK);
1028			} else {
1029				nuidp = nmp->nm_uidlruhead.tqh_first;
1030				LIST_REMOVE(nuidp, nu_hash);
1031				TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp,
1032					nu_lru);
1033			}
1034			nuidp->nu_flag = 0;
1035			nuidp->nu_cr.cr_uid = cred->cr_uid;
1036			nuidp->nu_expire = time.tv_sec + NFS_KERBTTL;
1037			nuidp->nu_timestamp = ktvout;
1038			nuidp->nu_nickname = nick;
1039			bcopy(key, nuidp->nu_key, sizeof (key));
1040			TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp,
1041				nu_lru);
1042			LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid),
1043				nuidp, nu_hash);
1044		}
1045	} else
1046		nfsm_adv(nfsm_rndup(len));
1047nfsmout:
1048	*mdp = md;
1049	*dposp = dpos;
1050	return (error);
1051}
1052
1053#ifndef NFS_NOSERVER
1054/*
1055 * Derefence a server socket structure. If it has no more references and
1056 * is no longer valid, you can throw it away.
1057 */
1058void
1059nfsrv_slpderef(slp)
1060	register struct nfssvc_sock *slp;
1061{
1062	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
1063		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1064		free((caddr_t)slp, M_NFSSVC);
1065	}
1066}
1067
1068/*
1069 * Initialize the data structures for the server.
1070 * Handshake with any new nfsds starting up to avoid any chance of
1071 * corruption.
1072 */
1073void
1074nfsrv_init(terminating)
1075	int terminating;
1076{
1077	register struct nfssvc_sock *slp, *nslp;
1078
1079	if (nfssvc_sockhead_flag & SLP_INIT)
1080		panic("nfsd init");
1081	nfssvc_sockhead_flag |= SLP_INIT;
1082	if (terminating) {
1083		for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) {
1084			nslp = slp->ns_chain.tqe_next;
1085			if (slp->ns_flag & SLP_VALID)
1086				nfsrv_zapsock(slp);
1087			TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1088			free((caddr_t)slp, M_NFSSVC);
1089		}
1090		nfsrv_cleancache();	/* And clear out server cache */
1091	}
1092
1093	TAILQ_INIT(&nfssvc_sockhead);
1094	nfssvc_sockhead_flag &= ~SLP_INIT;
1095	if (nfssvc_sockhead_flag & SLP_WANTINIT) {
1096		nfssvc_sockhead_flag &= ~SLP_WANTINIT;
1097		wakeup((caddr_t)&nfssvc_sockhead);
1098	}
1099
1100	TAILQ_INIT(&nfsd_head);
1101	nfsd_head_flag &= ~NFSD_CHECKSLP;
1102
1103	nfs_udpsock = (struct nfssvc_sock *)
1104	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1105	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
1106	TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
1107	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
1108
1109	nfs_cltpsock = (struct nfssvc_sock *)
1110	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1111	bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
1112	TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
1113	TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
1114}
1115
1116/*
1117 * Add entries to the server monitor log.
1118 */
1119static void
1120nfsd_rt(sotype, nd, cacherep)
1121	int sotype;
1122	register struct nfsrv_descript *nd;
1123	int cacherep;
1124{
1125	register struct drt *rt;
1126
1127	rt = &nfsdrt.drt[nfsdrt.pos];
1128	if (cacherep == RC_DOIT)
1129		rt->flag = 0;
1130	else if (cacherep == RC_REPLY)
1131		rt->flag = DRT_CACHEREPLY;
1132	else
1133		rt->flag = DRT_CACHEDROP;
1134	if (sotype == SOCK_STREAM)
1135		rt->flag |= DRT_TCP;
1136	if (nd->nd_flag & ND_NQNFS)
1137		rt->flag |= DRT_NQNFS;
1138	else if (nd->nd_flag & ND_NFSV3)
1139		rt->flag |= DRT_NFSV3;
1140	rt->proc = nd->nd_procnum;
1141	if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET)
1142	    rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr;
1143	else
1144	    rt->ipadr = INADDR_ANY;
1145	rt->resptime = ((time.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
1146		(time.tv_usec - nd->nd_starttime.tv_usec);
1147	rt->tstamp = time;
1148	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
1149}
1150#endif /* NFS_NOSERVER */
1151