nfs_nfsiod.c revision 22975
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
37 * $Id$
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/sysproto.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/file.h>
46#include <sys/filedesc.h>
47#include <sys/stat.h>
48#include <sys/vnode.h>
49#include <sys/mount.h>
50#include <sys/proc.h>
51#include <sys/uio.h>
52#include <sys/malloc.h>
53#include <sys/buf.h>
54#include <sys/mbuf.h>
55#include <sys/socket.h>
56#include <sys/socketvar.h>
57#include <sys/domain.h>
58#include <sys/protosw.h>
59#include <sys/namei.h>
60#include <sys/syslog.h>
61
62#include <netinet/in.h>
63#include <netinet/tcp.h>
64#ifdef ISO
65#include <netiso/iso.h>
66#endif
67#include <nfs/xdr_subs.h>
68#include <nfs/rpcv2.h>
69#include <nfs/nfsproto.h>
70#include <nfs/nfs.h>
71#include <nfs/nfsm_subs.h>
72#include <nfs/nfsrvcache.h>
73#include <nfs/nfsmount.h>
74#include <nfs/nfsnode.h>
75#include <nfs/nqnfs.h>
76#include <nfs/nfsrtt.h>
77
78/* Global defs. */
79extern int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
80					    struct nfssvc_sock *slp,
81					    struct proc *procp,
82					    struct mbuf **mreqp));
83extern int nfs_numasync;
84extern time_t nqnfsstarttime;
85extern int nqsrv_writeslack;
86extern int nfsrtton;
87extern struct nfsstats nfsstats;
88extern int nfsrvw_procrastinate;
89struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
90static int nuidhash_max = NFS_MAXUIDHASH;
91
92static void	nfsrv_zapsock __P((struct nfssvc_sock *slp));
93static int	nfssvc_iod __P((struct proc *));
94
95#define	TRUE	1
96#define	FALSE	0
97
98static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
99
100#ifndef NFS_NOSERVER
101int nfsd_waiting = 0;
102static struct nfsdrt nfsdrt;
103static int nfs_numnfsd = 0;
104static int notstarted = 1;
105static int modify_flag = 0;
106static void	nfsd_rt __P((int sotype, struct nfsrv_descript *nd,
107			     int cacherep));
108static int	nfssvc_addsock __P((struct file *,struct mbuf *));
109static int	nfssvc_nfsd __P((struct nfsd_srvargs *,caddr_t,struct proc *));
110/*
111 * NFS server system calls
112 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
113 */
114
115/*
116 * Get file handle system call
117 */
118#ifndef _SYS_SYSPROTO_H_
119struct getfh_args {
120	char	*fname;
121	fhandle_t *fhp;
122};
123#endif
124int
125getfh(p, uap, retval)
126	struct proc *p;
127	register struct getfh_args *uap;
128	int *retval;
129{
130	register struct vnode *vp;
131	fhandle_t fh;
132	int error;
133	struct nameidata nd;
134
135	/*
136	 * Must be super user
137	 */
138	error = suser(p->p_ucred, &p->p_acflag);
139	if(error)
140		return (error);
141	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
142	error = namei(&nd);
143	if (error)
144		return (error);
145	vp = nd.ni_vp;
146	bzero((caddr_t)&fh, sizeof(fh));
147	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
148	error = VFS_VPTOFH(vp, &fh.fh_fid);
149	vput(vp);
150	if (error)
151		return (error);
152	error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
153	return (error);
154}
155
156#endif /* NFS_NOSERVER */
157/*
158 * Nfs server psuedo system call for the nfsd's
159 * Based on the flag value it either:
160 * - adds a socket to the selection list
161 * - remains in the kernel as an nfsd
162 * - remains in the kernel as an nfsiod
163 */
164#ifndef _SYS_SYSPROTO_H_
165struct nfssvc_args {
166	int flag;
167	caddr_t argp;
168};
169#endif
170int
171nfssvc(p, uap, retval)
172	struct proc *p;
173	register struct nfssvc_args *uap;
174	int *retval;
175{
176#ifndef NFS_NOSERVER
177	struct nameidata nd;
178	struct file *fp;
179	struct mbuf *nam;
180	struct nfsd_args nfsdarg;
181	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
182	struct nfsd_cargs ncd;
183	struct nfsd *nfsd;
184	struct nfssvc_sock *slp;
185	struct nfsuid *nuidp;
186	struct nfsmount *nmp;
187#endif /* NFS_NOSERVER */
188	int error;
189
190	/*
191	 * Must be super user
192	 */
193	error = suser(p->p_ucred, &p->p_acflag);
194	if(error)
195		return (error);
196	while (nfssvc_sockhead_flag & SLP_INIT) {
197		 nfssvc_sockhead_flag |= SLP_WANTINIT;
198		(void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
199	}
200	if (uap->flag & NFSSVC_BIOD)
201		error = nfssvc_iod(p);
202#ifdef NFS_NOSERVER
203	else
204		error = ENXIO;
205#else /* !NFS_NOSERVER */
206	else if (uap->flag & NFSSVC_MNTD) {
207		error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
208		if (error)
209			return (error);
210		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
211			ncd.ncd_dirp, p);
212		error = namei(&nd);
213		if (error)
214			return (error);
215		if ((nd.ni_vp->v_flag & VROOT) == 0)
216			error = EINVAL;
217		nmp = VFSTONFS(nd.ni_vp->v_mount);
218		vput(nd.ni_vp);
219		if (error)
220			return (error);
221		if ((nmp->nm_flag & NFSMNT_MNTD) &&
222			(uap->flag & NFSSVC_GOTAUTH) == 0)
223			return (0);
224		nmp->nm_flag |= NFSMNT_MNTD;
225		error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
226			uap->argp, p);
227	} else if (uap->flag & NFSSVC_ADDSOCK) {
228		error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
229		if (error)
230			return (error);
231		error = getsock(p->p_fd, nfsdarg.sock, &fp);
232		if (error)
233			return (error);
234		/*
235		 * Get the client address for connected sockets.
236		 */
237		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
238			nam = (struct mbuf *)0;
239		else {
240			error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
241				MT_SONAME);
242			if (error)
243				return (error);
244		}
245		error = nfssvc_addsock(fp, nam);
246	} else {
247		error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
248		if (error)
249			return (error);
250		if ((uap->flag & NFSSVC_AUTHIN) && ((nfsd = nsd->nsd_nfsd)) &&
251			(nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
252			slp = nfsd->nfsd_slp;
253
254			/*
255			 * First check to see if another nfsd has already
256			 * added this credential.
257			 */
258			for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
259			    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
260				if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid &&
261				    (!nfsd->nfsd_nd->nd_nam2 ||
262				     netaddr_match(NU_NETFAM(nuidp),
263				     &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
264					break;
265			}
266			if (nuidp) {
267			    nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr);
268			    nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
269			} else {
270			    /*
271			     * Nope, so we will.
272			     */
273			    if (slp->ns_numuids < nuidhash_max) {
274				slp->ns_numuids++;
275				nuidp = (struct nfsuid *)
276				   malloc(sizeof (struct nfsuid), M_NFSUID,
277					M_WAITOK);
278			    } else
279				nuidp = (struct nfsuid *)0;
280			    if ((slp->ns_flag & SLP_VALID) == 0) {
281				if (nuidp)
282				    free((caddr_t)nuidp, M_NFSUID);
283			    } else {
284				if (nuidp == (struct nfsuid *)0) {
285				    nuidp = slp->ns_uidlruhead.tqh_first;
286				    LIST_REMOVE(nuidp, nu_hash);
287				    TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
288					nu_lru);
289				    if (nuidp->nu_flag & NU_NAM)
290					m_freem(nuidp->nu_nam);
291			        }
292				nuidp->nu_flag = 0;
293				nuidp->nu_cr = nsd->nsd_cr;
294				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
295				    nuidp->nu_cr.cr_ngroups = NGROUPS;
296				nuidp->nu_cr.cr_ref = 1;
297				nuidp->nu_timestamp = nsd->nsd_timestamp;
298				nuidp->nu_expire = time.tv_sec + nsd->nsd_ttl;
299				/*
300				 * and save the session key in nu_key.
301				 */
302				bcopy(nsd->nsd_key, nuidp->nu_key,
303				    sizeof (nsd->nsd_key));
304				if (nfsd->nfsd_nd->nd_nam2) {
305				    struct sockaddr_in *saddr;
306
307				    saddr = mtod(nfsd->nfsd_nd->nd_nam2,
308					 struct sockaddr_in *);
309				    switch (saddr->sin_family) {
310				    case AF_INET:
311					nuidp->nu_flag |= NU_INETADDR;
312					nuidp->nu_inetaddr =
313					     saddr->sin_addr.s_addr;
314					break;
315				    case AF_ISO:
316				    default:
317					nuidp->nu_flag |= NU_NAM;
318					nuidp->nu_nam = m_copym(
319					    nfsd->nfsd_nd->nd_nam2, 0,
320					     M_COPYALL, M_WAIT);
321					break;
322				    };
323				}
324				TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
325					nu_lru);
326				LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
327					nuidp, nu_hash);
328				nfsrv_setcred(&nuidp->nu_cr,
329				    &nfsd->nfsd_nd->nd_cr);
330				nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
331			    }
332			}
333		}
334		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
335			nfsd->nfsd_flag |= NFSD_AUTHFAIL;
336		error = nfssvc_nfsd(nsd, uap->argp, p);
337	}
338#endif /* NFS_NOSERVER */
339	if (error == EINTR || error == ERESTART)
340		error = 0;
341	return (error);
342}
343
344#ifndef NFS_NOSERVER
345/*
346 * Adds a socket to the list for servicing by nfsds.
347 */
348static int
349nfssvc_addsock(fp, mynam)
350	struct file *fp;
351	struct mbuf *mynam;
352{
353	register struct mbuf *m;
354	register int siz;
355	register struct nfssvc_sock *slp;
356	register struct socket *so;
357	struct nfssvc_sock *tslp;
358	int error, s;
359
360	so = (struct socket *)fp->f_data;
361	tslp = (struct nfssvc_sock *)0;
362	/*
363	 * Add it to the list, as required.
364	 */
365	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
366		tslp = nfs_udpsock;
367		if (tslp->ns_flag & SLP_VALID) {
368			m_freem(mynam);
369			return (EPERM);
370		}
371#ifdef ISO
372	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
373		tslp = nfs_cltpsock;
374		if (tslp->ns_flag & SLP_VALID) {
375			m_freem(mynam);
376			return (EPERM);
377		}
378#endif /* ISO */
379	}
380	if (so->so_type == SOCK_STREAM)
381		siz = NFS_MAXPACKET + sizeof (u_long);
382	else
383		siz = NFS_MAXPACKET;
384	error = soreserve(so, siz, siz);
385	if (error) {
386		m_freem(mynam);
387		return (error);
388	}
389
390	/*
391	 * Set protocol specific options { for now TCP only } and
392	 * reserve some space. For datagram sockets, this can get called
393	 * repeatedly for the same socket, but that isn't harmful.
394	 */
395	if (so->so_type == SOCK_STREAM) {
396		MGET(m, M_WAIT, MT_SOOPTS);
397		*mtod(m, int *) = 1;
398		m->m_len = sizeof(int);
399		sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
400	}
401	if (so->so_proto->pr_domain->dom_family == AF_INET &&
402	    so->so_proto->pr_protocol == IPPROTO_TCP) {
403		MGET(m, M_WAIT, MT_SOOPTS);
404		*mtod(m, int *) = 1;
405		m->m_len = sizeof(int);
406		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
407	}
408	so->so_rcv.sb_flags &= ~SB_NOINTR;
409	so->so_rcv.sb_timeo = 0;
410	so->so_snd.sb_flags &= ~SB_NOINTR;
411	so->so_snd.sb_timeo = 0;
412	if (tslp)
413		slp = tslp;
414	else {
415		slp = (struct nfssvc_sock *)
416			malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
417		bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
418		TAILQ_INIT(&slp->ns_uidlruhead);
419		TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
420	}
421	slp->ns_so = so;
422	slp->ns_nam = mynam;
423	fp->f_count++;
424	slp->ns_fp = fp;
425	s = splnet();
426	so->so_upcallarg = (caddr_t)slp;
427	so->so_upcall = nfsrv_rcv;
428	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
429	nfsrv_wakenfsd(slp);
430	splx(s);
431	return (0);
432}
433
434/*
435 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
436 * until it is killed by a signal.
437 */
438static int
439nfssvc_nfsd(nsd, argp, p)
440	struct nfsd_srvargs *nsd;
441	caddr_t argp;
442	struct proc *p;
443{
444	register struct mbuf *m;
445	register int siz;
446	register struct nfssvc_sock *slp;
447	register struct socket *so;
448	register int *solockp;
449	struct nfsd *nfsd = nsd->nsd_nfsd;
450	struct nfsrv_descript *nd = NULL;
451	struct mbuf *mreq;
452	int error = 0, cacherep, s, sotype, writes_todo;
453	u_quad_t cur_usec;
454
455#ifndef nolint
456	cacherep = RC_DOIT;
457	writes_todo = 0;
458#endif
459	s = splnet();
460	if (nfsd == (struct nfsd *)0) {
461		nsd->nsd_nfsd = nfsd = (struct nfsd *)
462			malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
463		bzero((caddr_t)nfsd, sizeof (struct nfsd));
464		nfsd->nfsd_procp = p;
465		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
466		nfs_numnfsd++;
467	}
468	/*
469	 * Loop getting rpc requests until SIGKILL.
470	 */
471	for (;;) {
472		if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
473			while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
474			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
475				nfsd->nfsd_flag |= NFSD_WAITING;
476				nfsd_waiting++;
477				error = tsleep((caddr_t)nfsd, PSOCK | PCATCH,
478				    "nfsd", 0);
479				nfsd_waiting--;
480				if (error)
481					goto done;
482			}
483			if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
484			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
485				for (slp = nfssvc_sockhead.tqh_first; slp != 0;
486				    slp = slp->ns_chain.tqe_next) {
487				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
488					== (SLP_VALID | SLP_DOREC)) {
489					    slp->ns_flag &= ~SLP_DOREC;
490					    slp->ns_sref++;
491					    nfsd->nfsd_slp = slp;
492					    break;
493				    }
494				}
495				if (slp == 0)
496					nfsd_head_flag &= ~NFSD_CHECKSLP;
497			}
498			if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0)
499				continue;
500			if (slp->ns_flag & SLP_VALID) {
501				if (slp->ns_flag & SLP_DISCONN)
502					nfsrv_zapsock(slp);
503				else if (slp->ns_flag & SLP_NEEDQ) {
504					slp->ns_flag &= ~SLP_NEEDQ;
505					(void) nfs_sndlock(&slp->ns_solock,
506						(struct nfsreq *)0);
507					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
508						M_WAIT);
509					nfs_sndunlock(&slp->ns_solock);
510				}
511				error = nfsrv_dorec(slp, nfsd, &nd);
512				cur_usec = (u_quad_t)time.tv_sec * 1000000 +
513					(u_quad_t)time.tv_usec;
514				if (error && slp->ns_tq.lh_first &&
515				    slp->ns_tq.lh_first->nd_time <= cur_usec) {
516					error = 0;
517					cacherep = RC_DOIT;
518					writes_todo = 1;
519				} else
520					writes_todo = 0;
521				nfsd->nfsd_flag |= NFSD_REQINPROG;
522			}
523		} else {
524			error = 0;
525			slp = nfsd->nfsd_slp;
526		}
527		if (error || (slp->ns_flag & SLP_VALID) == 0) {
528			if (nd) {
529				free((caddr_t)nd, M_NFSRVDESC);
530				nd = NULL;
531			}
532			nfsd->nfsd_slp = (struct nfssvc_sock *)0;
533			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
534			nfsrv_slpderef(slp);
535			continue;
536		}
537		splx(s);
538		so = slp->ns_so;
539		sotype = so->so_type;
540		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
541			solockp = &slp->ns_solock;
542		else
543			solockp = (int *)0;
544		if (nd) {
545		    nd->nd_starttime = time;
546		    if (nd->nd_nam2)
547			nd->nd_nam = nd->nd_nam2;
548		    else
549			nd->nd_nam = slp->ns_nam;
550
551		    /*
552		     * Check to see if authorization is needed.
553		     */
554		    if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
555			nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
556			nsd->nsd_haddr = mtod(nd->nd_nam,
557			    struct sockaddr_in *)->sin_addr.s_addr;
558			nsd->nsd_authlen = nfsd->nfsd_authlen;
559			nsd->nsd_verflen = nfsd->nfsd_verflen;
560			if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr,
561				nfsd->nfsd_authlen) &&
562			    !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr,
563				nfsd->nfsd_verflen) &&
564			    !copyout((caddr_t)nsd, argp, sizeof (*nsd)))
565			    return (ENEEDAUTH);
566			cacherep = RC_DROPIT;
567		    } else
568			cacherep = nfsrv_getcache(nd, slp, &mreq);
569
570		    /*
571		     * Check for just starting up for NQNFS and send
572		     * fake "try again later" replies to the NQNFS clients.
573		     */
574		    if (notstarted && nqnfsstarttime <= time.tv_sec) {
575			if (modify_flag) {
576				nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
577				modify_flag = 0;
578			} else
579				notstarted = 0;
580		    }
581		    if (notstarted) {
582			if ((nd->nd_flag & ND_NQNFS) == 0)
583				cacherep = RC_DROPIT;
584			else if (nd->nd_procnum != NFSPROC_WRITE) {
585				nd->nd_procnum = NFSPROC_NOOP;
586				nd->nd_repstat = NQNFS_TRYLATER;
587				cacherep = RC_DOIT;
588			} else
589				modify_flag = 1;
590		    } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
591			nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
592			nd->nd_procnum = NFSPROC_NOOP;
593			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
594			cacherep = RC_DOIT;
595		    }
596		}
597
598		/*
599		 * Loop to get all the write rpc relies that have been
600		 * gathered together.
601		 */
602		do {
603		    switch (cacherep) {
604		    case RC_DOIT:
605			if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE &&
606			    nfsrvw_procrastinate > 0 && !notstarted))
607			    error = nfsrv_writegather(&nd, slp,
608				nfsd->nfsd_procp, &mreq);
609			else
610			    error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
611				slp, nfsd->nfsd_procp, &mreq);
612			if (mreq == NULL)
613				break;
614			if (error) {
615				if (nd->nd_procnum != NQNFSPROC_VACATED)
616					nfsstats.srv_errs++;
617				nfsrv_updatecache(nd, FALSE, mreq);
618				if (nd->nd_nam2)
619					m_freem(nd->nd_nam2);
620				break;
621			}
622			nfsstats.srvrpccnt[nd->nd_procnum]++;
623			nfsrv_updatecache(nd, TRUE, mreq);
624			nd->nd_mrep = (struct mbuf *)0;
625		    case RC_REPLY:
626			m = mreq;
627			siz = 0;
628			while (m) {
629				siz += m->m_len;
630				m = m->m_next;
631			}
632			if (siz <= 0 || siz > NFS_MAXPACKET) {
633				printf("mbuf siz=%d\n",siz);
634				panic("Bad nfs svc reply");
635			}
636			m = mreq;
637			m->m_pkthdr.len = siz;
638			m->m_pkthdr.rcvif = (struct ifnet *)0;
639			/*
640			 * For stream protocols, prepend a Sun RPC
641			 * Record Mark.
642			 */
643			if (sotype == SOCK_STREAM) {
644				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
645				*mtod(m, u_long *) = htonl(0x80000000 | siz);
646			}
647			if (solockp)
648				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
649			if (slp->ns_flag & SLP_VALID)
650			    error = nfs_send(so, nd->nd_nam2, m, NULL);
651			else {
652			    error = EPIPE;
653			    m_freem(m);
654			}
655			if (nfsrtton)
656				nfsd_rt(sotype, nd, cacherep);
657			if (nd->nd_nam2)
658				MFREE(nd->nd_nam2, m);
659			if (nd->nd_mrep)
660				m_freem(nd->nd_mrep);
661			if (error == EPIPE)
662				nfsrv_zapsock(slp);
663			if (solockp)
664				nfs_sndunlock(solockp);
665			if (error == EINTR || error == ERESTART) {
666				free((caddr_t)nd, M_NFSRVDESC);
667				nfsrv_slpderef(slp);
668				s = splnet();
669				goto done;
670			}
671			break;
672		    case RC_DROPIT:
673			if (nfsrtton)
674				nfsd_rt(sotype, nd, cacherep);
675			m_freem(nd->nd_mrep);
676			m_freem(nd->nd_nam2);
677			break;
678		    };
679		    if (nd) {
680			FREE((caddr_t)nd, M_NFSRVDESC);
681			nd = NULL;
682		    }
683
684		    /*
685		     * Check to see if there are outstanding writes that
686		     * need to be serviced.
687		     */
688		    cur_usec = (u_quad_t)time.tv_sec * 1000000 +
689			(u_quad_t)time.tv_usec;
690		    s = splsoftclock();
691		    if (slp->ns_tq.lh_first &&
692			slp->ns_tq.lh_first->nd_time <= cur_usec) {
693			cacherep = RC_DOIT;
694			writes_todo = 1;
695		    } else
696			writes_todo = 0;
697		    splx(s);
698		} while (writes_todo);
699		s = splnet();
700		if (nfsrv_dorec(slp, nfsd, &nd)) {
701			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
702			nfsd->nfsd_slp = NULL;
703			nfsrv_slpderef(slp);
704		}
705	}
706done:
707	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
708	splx(s);
709	free((caddr_t)nfsd, M_NFSD);
710	nsd->nsd_nfsd = (struct nfsd *)0;
711	if (--nfs_numnfsd == 0)
712		nfsrv_init(TRUE);	/* Reinitialize everything */
713	return (error);
714}
715#endif /* NFS_NOSERVER */
716
717int nfs_defect = 0;
718SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
719
720/*
721 * Asynchronous I/O daemons for client nfs.
722 * They do read-ahead and write-behind operations on the block I/O cache.
723 * Never returns unless it fails or gets killed.
724 */
725static int
726nfssvc_iod(p)
727	struct proc *p;
728{
729	register struct buf *bp, *nbp;
730	register int i, myiod;
731	struct vnode *vp;
732	struct nfsmount *nmp;
733	int error = 0, s;
734
735	/*
736	 * Assign my position or return error if too many already running
737	 */
738	myiod = -1;
739	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
740		if (nfs_asyncdaemon[i] == 0) {
741			nfs_asyncdaemon[i]++;
742			myiod = i;
743			break;
744		}
745	if (myiod == -1)
746		return (EBUSY);
747	nfs_numasync++;
748	/*
749	 * Just loop around doin our stuff until SIGKILL
750	 */
751	for (;;) {
752	    while (((nmp = nfs_iodmount[myiod]) == NULL
753		    || nmp->nm_bufq.tqh_first == NULL)
754		   && error == 0) {
755		if (nmp)
756		    nmp->nm_bufqiods--;
757		nfs_iodwant[myiod] = p;
758		nfs_iodmount[myiod] = NULL;
759		error = tsleep((caddr_t)&nfs_iodwant[myiod],
760			PWAIT | PCATCH, "nfsidl", 0);
761	    }
762	    if (error) {
763		nfs_asyncdaemon[myiod] = 0;
764		if (nmp) nmp->nm_bufqiods--;
765		nfs_iodmount[myiod] = NULL;
766		nfs_numasync--;
767		return (error);
768	    }
769	    while ((bp = nmp->nm_bufq.tqh_first) != NULL) {
770		/* Take one off the front of the list */
771		TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
772		nmp->nm_bufqlen--;
773		if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
774		    nmp->nm_bufqwant = FALSE;
775		    wakeup(&nmp->nm_bufq);
776		}
777		if (bp->b_flags & B_READ)
778		    (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
779		else
780		    (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
781
782		/*
783		 * If there are more than one iod on this mount, then defect
784		 * so that the iods can be shared out fairly between the mounts
785		 */
786		if (nfs_defect && nmp->nm_bufqiods > 1) {
787		    NFS_DPF(ASYNCIO,
788			    ("nfssvc_iod: iod %d defecting from mount %p\n",
789			     myiod, nmp));
790		    nfs_iodmount[myiod] = NULL;
791		    nmp->nm_bufqiods--;
792		    break;
793		}
794	    }
795	}
796}
797
798/*
799 * Shut down a socket associated with an nfssvc_sock structure.
800 * Should be called with the send lock set, if required.
801 * The trick here is to increment the sref at the start, so that the nfsds
802 * will stop using it and clear ns_flag at the end so that it will not be
803 * reassigned during cleanup.
804 */
805static void
806nfsrv_zapsock(slp)
807	register struct nfssvc_sock *slp;
808{
809	register struct nfsuid *nuidp, *nnuidp;
810	register struct nfsrv_descript *nwp, *nnwp;
811	struct socket *so;
812	struct file *fp;
813	struct mbuf *m;
814	int s;
815
816	slp->ns_flag &= ~SLP_ALLFLAGS;
817	fp = slp->ns_fp;
818	if (fp) {
819		slp->ns_fp = (struct file *)0;
820		so = slp->ns_so;
821		so->so_upcall = NULL;
822		soshutdown(so, 2);
823		closef(fp, (struct proc *)0);
824		if (slp->ns_nam)
825			MFREE(slp->ns_nam, m);
826		m_freem(slp->ns_raw);
827		m_freem(slp->ns_rec);
828		for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0;
829		    nuidp = nnuidp) {
830			nnuidp = nuidp->nu_lru.tqe_next;
831			LIST_REMOVE(nuidp, nu_hash);
832			TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
833			if (nuidp->nu_flag & NU_NAM)
834				m_freem(nuidp->nu_nam);
835			free((caddr_t)nuidp, M_NFSUID);
836		}
837		s = splsoftclock();
838		for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
839			nnwp = nwp->nd_tq.le_next;
840			LIST_REMOVE(nwp, nd_tq);
841			free((caddr_t)nwp, M_NFSRVDESC);
842		}
843		LIST_INIT(&slp->ns_tq);
844		splx(s);
845	}
846}
847
848/*
849 * Get an authorization string for the uid by having the mount_nfs sitting
850 * on this mount point porpous out of the kernel and do it.
851 */
852int
853nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key)
854	register struct nfsmount *nmp;
855	struct nfsreq *rep;
856	struct ucred *cred;
857	char **auth_str;
858	int *auth_len;
859	char *verf_str;
860	int *verf_len;
861	NFSKERBKEY_T key;		/* return session key */
862{
863	int error = 0;
864
865	while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
866		nmp->nm_flag |= NFSMNT_WANTAUTH;
867		(void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
868			"nfsauth1", 2 * hz);
869		error = nfs_sigintr(nmp, rep, rep->r_procp);
870		if (error) {
871			nmp->nm_flag &= ~NFSMNT_WANTAUTH;
872			return (error);
873		}
874	}
875	nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
876	nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
877	nmp->nm_authlen = RPCAUTH_MAXSIZ;
878	nmp->nm_verfstr = verf_str;
879	nmp->nm_verflen = *verf_len;
880	nmp->nm_authuid = cred->cr_uid;
881	wakeup((caddr_t)&nmp->nm_authstr);
882
883	/*
884	 * And wait for mount_nfs to do its stuff.
885	 */
886	while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
887		(void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
888			"nfsauth2", 2 * hz);
889		error = nfs_sigintr(nmp, rep, rep->r_procp);
890	}
891	if (nmp->nm_flag & NFSMNT_AUTHERR) {
892		nmp->nm_flag &= ~NFSMNT_AUTHERR;
893		error = EAUTH;
894	}
895	if (error)
896		free((caddr_t)*auth_str, M_TEMP);
897	else {
898		*auth_len = nmp->nm_authlen;
899		*verf_len = nmp->nm_verflen;
900		bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
901	}
902	nmp->nm_flag &= ~NFSMNT_HASAUTH;
903	nmp->nm_flag |= NFSMNT_WAITAUTH;
904	if (nmp->nm_flag & NFSMNT_WANTAUTH) {
905		nmp->nm_flag &= ~NFSMNT_WANTAUTH;
906		wakeup((caddr_t)&nmp->nm_authtype);
907	}
908	return (error);
909}
910
911/*
912 * Get a nickname authenticator and verifier.
913 */
914int
915nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len)
916	struct nfsmount *nmp;
917	struct ucred *cred;
918	char **auth_str;
919	int *auth_len;
920	char *verf_str;
921	int verf_len;
922{
923	register struct nfsuid *nuidp;
924	register u_long *nickp, *verfp;
925	struct timeval ktvin, ktvout;
926
927#ifdef DIAGNOSTIC
928	if (verf_len < (4 * NFSX_UNSIGNED))
929		panic("nfs_getnickauth verf too small");
930#endif
931	for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first;
932	    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
933		if (nuidp->nu_cr.cr_uid == cred->cr_uid)
934			break;
935	}
936	if (!nuidp || nuidp->nu_expire < time.tv_sec)
937		return (EACCES);
938
939	/*
940	 * Move to the end of the lru list (end of lru == most recently used).
941	 */
942	TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
943	TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
944
945	nickp = (u_long *)malloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
946	*nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
947	*nickp = txdr_unsigned(nuidp->nu_nickname);
948	*auth_str = (char *)nickp;
949	*auth_len = 2 * NFSX_UNSIGNED;
950
951	/*
952	 * Now we must encrypt the verifier and package it up.
953	 */
954	verfp = (u_long *)verf_str;
955	*verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
956	if (time.tv_sec > nuidp->nu_timestamp.tv_sec ||
957	    (time.tv_sec == nuidp->nu_timestamp.tv_sec &&
958	     time.tv_usec > nuidp->nu_timestamp.tv_usec))
959		nuidp->nu_timestamp = time;
960	else
961		nuidp->nu_timestamp.tv_usec++;
962	ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
963	ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
964
965	/*
966	 * Now encrypt the timestamp verifier in ecb mode using the session
967	 * key.
968	 */
969#ifdef NFSKERB
970	XXX
971#endif
972
973	*verfp++ = ktvout.tv_sec;
974	*verfp++ = ktvout.tv_usec;
975	*verfp = 0;
976	return (0);
977}
978
979/*
980 * Save the current nickname in a hash list entry on the mount point.
981 */
982int
983nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep)
984	register struct nfsmount *nmp;
985	struct ucred *cred;
986	int len;
987	NFSKERBKEY_T key;
988	struct mbuf **mdp;
989	char **dposp;
990	struct mbuf *mrep;
991{
992	register struct nfsuid *nuidp;
993	register u_long *tl;
994	register long t1;
995	struct mbuf *md = *mdp;
996	struct timeval ktvin, ktvout;
997	u_long nick;
998	char *dpos = *dposp, *cp2;
999	int deltasec, error = 0;
1000
1001	if (len == (3 * NFSX_UNSIGNED)) {
1002		nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1003		ktvin.tv_sec = *tl++;
1004		ktvin.tv_usec = *tl++;
1005		nick = fxdr_unsigned(u_long, *tl);
1006
1007		/*
1008		 * Decrypt the timestamp in ecb mode.
1009		 */
1010#ifdef NFSKERB
1011		XXX
1012#endif
1013		ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
1014		ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
1015		deltasec = time.tv_sec - ktvout.tv_sec;
1016		if (deltasec < 0)
1017			deltasec = -deltasec;
1018		/*
1019		 * If ok, add it to the hash list for the mount point.
1020		 */
1021		if (deltasec <= NFS_KERBCLOCKSKEW) {
1022			if (nmp->nm_numuids < nuidhash_max) {
1023				nmp->nm_numuids++;
1024				nuidp = (struct nfsuid *)
1025				   malloc(sizeof (struct nfsuid), M_NFSUID,
1026					M_WAITOK);
1027			} else {
1028				nuidp = nmp->nm_uidlruhead.tqh_first;
1029				LIST_REMOVE(nuidp, nu_hash);
1030				TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp,
1031					nu_lru);
1032			}
1033			nuidp->nu_flag = 0;
1034			nuidp->nu_cr.cr_uid = cred->cr_uid;
1035			nuidp->nu_expire = time.tv_sec + NFS_KERBTTL;
1036			nuidp->nu_timestamp = ktvout;
1037			nuidp->nu_nickname = nick;
1038			bcopy(key, nuidp->nu_key, sizeof (key));
1039			TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp,
1040				nu_lru);
1041			LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid),
1042				nuidp, nu_hash);
1043		}
1044	} else
1045		nfsm_adv(nfsm_rndup(len));
1046nfsmout:
1047	*mdp = md;
1048	*dposp = dpos;
1049	return (error);
1050}
1051
1052#ifndef NFS_NOSERVER
1053
1054/*
1055 * Derefence a server socket structure. If it has no more references and
1056 * is no longer valid, you can throw it away.
1057 */
1058void
1059nfsrv_slpderef(slp)
1060	register struct nfssvc_sock *slp;
1061{
1062	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
1063		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1064		free((caddr_t)slp, M_NFSSVC);
1065	}
1066}
1067
1068/*
1069 * Initialize the data structures for the server.
1070 * Handshake with any new nfsds starting up to avoid any chance of
1071 * corruption.
1072 */
1073void
1074nfsrv_init(terminating)
1075	int terminating;
1076{
1077	register struct nfssvc_sock *slp, *nslp;
1078
1079	if (nfssvc_sockhead_flag & SLP_INIT)
1080		panic("nfsd init");
1081	nfssvc_sockhead_flag |= SLP_INIT;
1082	if (terminating) {
1083		for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) {
1084			nslp = slp->ns_chain.tqe_next;
1085			if (slp->ns_flag & SLP_VALID)
1086				nfsrv_zapsock(slp);
1087			TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1088			free((caddr_t)slp, M_NFSSVC);
1089		}
1090		nfsrv_cleancache();	/* And clear out server cache */
1091	}
1092
1093	TAILQ_INIT(&nfssvc_sockhead);
1094	nfssvc_sockhead_flag &= ~SLP_INIT;
1095	if (nfssvc_sockhead_flag & SLP_WANTINIT) {
1096		nfssvc_sockhead_flag &= ~SLP_WANTINIT;
1097		wakeup((caddr_t)&nfssvc_sockhead);
1098	}
1099
1100	TAILQ_INIT(&nfsd_head);
1101	nfsd_head_flag &= ~NFSD_CHECKSLP;
1102
1103	nfs_udpsock = (struct nfssvc_sock *)
1104	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1105	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
1106	TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
1107	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
1108
1109	nfs_cltpsock = (struct nfssvc_sock *)
1110	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1111	bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
1112	TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
1113	TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
1114}
1115
1116/*
1117 * Add entries to the server monitor log.
1118 */
1119static void
1120nfsd_rt(sotype, nd, cacherep)
1121	int sotype;
1122	register struct nfsrv_descript *nd;
1123	int cacherep;
1124{
1125	register struct drt *rt;
1126
1127	rt = &nfsdrt.drt[nfsdrt.pos];
1128	if (cacherep == RC_DOIT)
1129		rt->flag = 0;
1130	else if (cacherep == RC_REPLY)
1131		rt->flag = DRT_CACHEREPLY;
1132	else
1133		rt->flag = DRT_CACHEDROP;
1134	if (sotype == SOCK_STREAM)
1135		rt->flag |= DRT_TCP;
1136	if (nd->nd_flag & ND_NQNFS)
1137		rt->flag |= DRT_NQNFS;
1138	else if (nd->nd_flag & ND_NFSV3)
1139		rt->flag |= DRT_NFSV3;
1140	rt->proc = nd->nd_procnum;
1141	if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET)
1142	    rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr;
1143	else
1144	    rt->ipadr = INADDR_ANY;
1145	rt->resptime = ((time.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
1146		(time.tv_usec - nd->nd_starttime.tv_usec);
1147	rt->tstamp = time;
1148	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
1149}
1150#endif /* NFS_NOSERVER */
1151