nfs_nfsiod.c revision 25664
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
37 * $Id: nfs_syscalls.c,v 1.22 1997/04/30 09:51:37 dfr Exp $
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/sysproto.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/file.h>
46#include <sys/filedesc.h>
47#include <sys/stat.h>
48#include <sys/vnode.h>
49#include <sys/mount.h>
50#include <sys/proc.h>
51#include <sys/uio.h>
52#include <sys/malloc.h>
53#include <sys/buf.h>
54#include <sys/mbuf.h>
55#include <sys/socket.h>
56#include <sys/socketvar.h>
57#include <sys/domain.h>
58#include <sys/protosw.h>
59#include <sys/namei.h>
60#include <sys/syslog.h>
61
62#include <netinet/in.h>
63#include <netinet/tcp.h>
64#ifdef ISO
65#include <netiso/iso.h>
66#endif
67#include <nfs/xdr_subs.h>
68#include <nfs/rpcv2.h>
69#include <nfs/nfsproto.h>
70#include <nfs/nfs.h>
71#include <nfs/nfsm_subs.h>
72#include <nfs/nfsrvcache.h>
73#include <nfs/nfsmount.h>
74#include <nfs/nfsnode.h>
75#include <nfs/nqnfs.h>
76#include <nfs/nfsrtt.h>
77
78/* Global defs. */
79extern int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
80					    struct nfssvc_sock *slp,
81					    struct proc *procp,
82					    struct mbuf **mreqp));
83extern int nfs_numasync;
84extern time_t nqnfsstarttime;
85extern int nqsrv_writeslack;
86extern int nfsrtton;
87extern struct nfsstats nfsstats;
88extern int nfsrvw_procrastinate;
89extern int nfsrvw_procrastinate_v3;
90struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
91static int nuidhash_max = NFS_MAXUIDHASH;
92
93static void	nfsrv_zapsock __P((struct nfssvc_sock *slp));
94static int	nfssvc_iod __P((struct proc *));
95
96#define	TRUE	1
97#define	FALSE	0
98
99static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
100
101#ifndef NFS_NOSERVER
102int nfsd_waiting = 0;
103static struct nfsdrt nfsdrt;
104static int nfs_numnfsd = 0;
105static int notstarted = 1;
106static int modify_flag = 0;
107static void	nfsd_rt __P((int sotype, struct nfsrv_descript *nd,
108			     int cacherep));
109static int	nfssvc_addsock __P((struct file *, struct mbuf *,
110				    struct proc *));
111static int	nfssvc_nfsd __P((struct nfsd_srvargs *,caddr_t,struct proc *));
112
113static int nfs_privport = 0;
114SYSCTL_INT(_vfs_nfs, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, &nfs_privport, 0, "");
115SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay, CTLFLAG_RW, &nfsrvw_procrastinate, 0, "");
116SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate_v3, 0, "");
117
118/*
119 * NFS server system calls
120 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
121 */
122
123/*
124 * Get file handle system call
125 */
126#ifndef _SYS_SYSPROTO_H_
127struct getfh_args {
128	char	*fname;
129	fhandle_t *fhp;
130};
131#endif
132int
133getfh(p, uap, retval)
134	struct proc *p;
135	register struct getfh_args *uap;
136	int *retval;
137{
138	register struct vnode *vp;
139	fhandle_t fh;
140	int error;
141	struct nameidata nd;
142
143	/*
144	 * Must be super user
145	 */
146	error = suser(p->p_ucred, &p->p_acflag);
147	if(error)
148		return (error);
149	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
150	error = namei(&nd);
151	if (error)
152		return (error);
153	vp = nd.ni_vp;
154	bzero((caddr_t)&fh, sizeof(fh));
155	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
156	error = VFS_VPTOFH(vp, &fh.fh_fid);
157	vput(vp);
158	if (error)
159		return (error);
160	error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
161	return (error);
162}
163
164#endif /* NFS_NOSERVER */
165/*
166 * Nfs server psuedo system call for the nfsd's
167 * Based on the flag value it either:
168 * - adds a socket to the selection list
169 * - remains in the kernel as an nfsd
170 * - remains in the kernel as an nfsiod
171 */
172#ifndef _SYS_SYSPROTO_H_
173struct nfssvc_args {
174	int flag;
175	caddr_t argp;
176};
177#endif
178int
179nfssvc(p, uap, retval)
180	struct proc *p;
181	register struct nfssvc_args *uap;
182	int *retval;
183{
184#ifndef NFS_NOSERVER
185	struct nameidata nd;
186	struct file *fp;
187	struct mbuf *nam;
188	struct nfsd_args nfsdarg;
189	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
190	struct nfsd_cargs ncd;
191	struct nfsd *nfsd;
192	struct nfssvc_sock *slp;
193	struct nfsuid *nuidp;
194	struct nfsmount *nmp;
195#endif /* NFS_NOSERVER */
196	int error;
197
198	/*
199	 * Must be super user
200	 */
201	error = suser(p->p_ucred, &p->p_acflag);
202	if(error)
203		return (error);
204	while (nfssvc_sockhead_flag & SLP_INIT) {
205		 nfssvc_sockhead_flag |= SLP_WANTINIT;
206		(void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
207	}
208	if (uap->flag & NFSSVC_BIOD)
209		error = nfssvc_iod(p);
210#ifdef NFS_NOSERVER
211	else
212		error = ENXIO;
213#else /* !NFS_NOSERVER */
214	else if (uap->flag & NFSSVC_MNTD) {
215		error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
216		if (error)
217			return (error);
218		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
219			ncd.ncd_dirp, p);
220		error = namei(&nd);
221		if (error)
222			return (error);
223		if ((nd.ni_vp->v_flag & VROOT) == 0)
224			error = EINVAL;
225		nmp = VFSTONFS(nd.ni_vp->v_mount);
226		vput(nd.ni_vp);
227		if (error)
228			return (error);
229		if ((nmp->nm_flag & NFSMNT_MNTD) &&
230			(uap->flag & NFSSVC_GOTAUTH) == 0)
231			return (0);
232		nmp->nm_flag |= NFSMNT_MNTD;
233		error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
234			uap->argp, p);
235	} else if (uap->flag & NFSSVC_ADDSOCK) {
236		error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
237		if (error)
238			return (error);
239		error = getsock(p->p_fd, nfsdarg.sock, &fp);
240		if (error)
241			return (error);
242		/*
243		 * Get the client address for connected sockets.
244		 */
245		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
246			nam = (struct mbuf *)0;
247		else {
248			error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
249				MT_SONAME);
250			if (error)
251				return (error);
252		}
253		error = nfssvc_addsock(fp, nam, p);
254	} else {
255		error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
256		if (error)
257			return (error);
258		if ((uap->flag & NFSSVC_AUTHIN) && ((nfsd = nsd->nsd_nfsd)) &&
259			(nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
260			slp = nfsd->nfsd_slp;
261
262			/*
263			 * First check to see if another nfsd has already
264			 * added this credential.
265			 */
266			for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
267			    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
268				if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid &&
269				    (!nfsd->nfsd_nd->nd_nam2 ||
270				     netaddr_match(NU_NETFAM(nuidp),
271				     &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
272					break;
273			}
274			if (nuidp) {
275			    nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr);
276			    nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
277			} else {
278			    /*
279			     * Nope, so we will.
280			     */
281			    if (slp->ns_numuids < nuidhash_max) {
282				slp->ns_numuids++;
283				nuidp = (struct nfsuid *)
284				   malloc(sizeof (struct nfsuid), M_NFSUID,
285					M_WAITOK);
286			    } else
287				nuidp = (struct nfsuid *)0;
288			    if ((slp->ns_flag & SLP_VALID) == 0) {
289				if (nuidp)
290				    free((caddr_t)nuidp, M_NFSUID);
291			    } else {
292				if (nuidp == (struct nfsuid *)0) {
293				    nuidp = slp->ns_uidlruhead.tqh_first;
294				    LIST_REMOVE(nuidp, nu_hash);
295				    TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
296					nu_lru);
297				    if (nuidp->nu_flag & NU_NAM)
298					m_freem(nuidp->nu_nam);
299			        }
300				nuidp->nu_flag = 0;
301				nuidp->nu_cr = nsd->nsd_cr;
302				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
303				    nuidp->nu_cr.cr_ngroups = NGROUPS;
304				nuidp->nu_cr.cr_ref = 1;
305				nuidp->nu_timestamp = nsd->nsd_timestamp;
306				nuidp->nu_expire = time.tv_sec + nsd->nsd_ttl;
307				/*
308				 * and save the session key in nu_key.
309				 */
310				bcopy(nsd->nsd_key, nuidp->nu_key,
311				    sizeof (nsd->nsd_key));
312				if (nfsd->nfsd_nd->nd_nam2) {
313				    struct sockaddr_in *saddr;
314
315				    saddr = mtod(nfsd->nfsd_nd->nd_nam2,
316					 struct sockaddr_in *);
317				    switch (saddr->sin_family) {
318				    case AF_INET:
319					nuidp->nu_flag |= NU_INETADDR;
320					nuidp->nu_inetaddr =
321					     saddr->sin_addr.s_addr;
322					break;
323				    case AF_ISO:
324				    default:
325					nuidp->nu_flag |= NU_NAM;
326					nuidp->nu_nam = m_copym(
327					    nfsd->nfsd_nd->nd_nam2, 0,
328					     M_COPYALL, M_WAIT);
329					break;
330				    };
331				}
332				TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
333					nu_lru);
334				LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
335					nuidp, nu_hash);
336				nfsrv_setcred(&nuidp->nu_cr,
337				    &nfsd->nfsd_nd->nd_cr);
338				nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
339			    }
340			}
341		}
342		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
343			nfsd->nfsd_flag |= NFSD_AUTHFAIL;
344		error = nfssvc_nfsd(nsd, uap->argp, p);
345	}
346#endif /* NFS_NOSERVER */
347	if (error == EINTR || error == ERESTART)
348		error = 0;
349	return (error);
350}
351
352#ifndef NFS_NOSERVER
353/*
354 * Adds a socket to the list for servicing by nfsds.
355 */
356static int
357nfssvc_addsock(fp, mynam, p)
358	struct file *fp;
359	struct mbuf *mynam;
360	struct proc *p;
361{
362	register struct mbuf *m;
363	register int siz;
364	register struct nfssvc_sock *slp;
365	register struct socket *so;
366	struct nfssvc_sock *tslp;
367	int error, s;
368
369	so = (struct socket *)fp->f_data;
370	tslp = (struct nfssvc_sock *)0;
371	/*
372	 * Add it to the list, as required.
373	 */
374	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
375		tslp = nfs_udpsock;
376		if (tslp->ns_flag & SLP_VALID) {
377			m_freem(mynam);
378			return (EPERM);
379		}
380#ifdef ISO
381	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
382		tslp = nfs_cltpsock;
383		if (tslp->ns_flag & SLP_VALID) {
384			m_freem(mynam);
385			return (EPERM);
386		}
387#endif /* ISO */
388	}
389	if (so->so_type == SOCK_STREAM)
390		siz = NFS_MAXPACKET + sizeof (u_long);
391	else
392		siz = NFS_MAXPACKET;
393	error = soreserve(so, siz, siz);
394	if (error) {
395		m_freem(mynam);
396		return (error);
397	}
398
399	/*
400	 * Set protocol specific options { for now TCP only } and
401	 * reserve some space. For datagram sockets, this can get called
402	 * repeatedly for the same socket, but that isn't harmful.
403	 */
404	if (so->so_type == SOCK_STREAM) {
405		MGET(m, M_WAIT, MT_SOOPTS);
406		*mtod(m, int *) = 1;
407		m->m_len = sizeof(int);
408		sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m, p);
409	}
410	if (so->so_proto->pr_domain->dom_family == AF_INET &&
411	    so->so_proto->pr_protocol == IPPROTO_TCP) {
412		MGET(m, M_WAIT, MT_SOOPTS);
413		*mtod(m, int *) = 1;
414		m->m_len = sizeof(int);
415		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m, p);
416	}
417	so->so_rcv.sb_flags &= ~SB_NOINTR;
418	so->so_rcv.sb_timeo = 0;
419	so->so_snd.sb_flags &= ~SB_NOINTR;
420	so->so_snd.sb_timeo = 0;
421	if (tslp)
422		slp = tslp;
423	else {
424		slp = (struct nfssvc_sock *)
425			malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
426		bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
427		TAILQ_INIT(&slp->ns_uidlruhead);
428		TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
429	}
430	slp->ns_so = so;
431	slp->ns_nam = mynam;
432	fp->f_count++;
433	slp->ns_fp = fp;
434	s = splnet();
435	so->so_upcallarg = (caddr_t)slp;
436	so->so_upcall = nfsrv_rcv;
437	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
438	nfsrv_wakenfsd(slp);
439	splx(s);
440	return (0);
441}
442
443/*
444 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
445 * until it is killed by a signal.
446 */
447static int
448nfssvc_nfsd(nsd, argp, p)
449	struct nfsd_srvargs *nsd;
450	caddr_t argp;
451	struct proc *p;
452{
453	register struct mbuf *m;
454	register int siz;
455	register struct nfssvc_sock *slp;
456	register struct socket *so;
457	register int *solockp;
458	struct nfsd *nfsd = nsd->nsd_nfsd;
459	struct nfsrv_descript *nd = NULL;
460	struct mbuf *mreq;
461	int error = 0, cacherep, s, sotype, writes_todo;
462	int procrastinate;
463	u_quad_t cur_usec;
464
465#ifndef nolint
466	cacherep = RC_DOIT;
467	writes_todo = 0;
468#endif
469	s = splnet();
470	if (nfsd == (struct nfsd *)0) {
471		nsd->nsd_nfsd = nfsd = (struct nfsd *)
472			malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
473		bzero((caddr_t)nfsd, sizeof (struct nfsd));
474		nfsd->nfsd_procp = p;
475		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
476		nfs_numnfsd++;
477	}
478	/*
479	 * Loop getting rpc requests until SIGKILL.
480	 */
481	for (;;) {
482		if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
483			while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
484			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
485				nfsd->nfsd_flag |= NFSD_WAITING;
486				nfsd_waiting++;
487				error = tsleep((caddr_t)nfsd, PSOCK | PCATCH,
488				    "nfsd", 0);
489				nfsd_waiting--;
490				if (error)
491					goto done;
492			}
493			if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
494			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
495				for (slp = nfssvc_sockhead.tqh_first; slp != 0;
496				    slp = slp->ns_chain.tqe_next) {
497				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
498					== (SLP_VALID | SLP_DOREC)) {
499					    slp->ns_flag &= ~SLP_DOREC;
500					    slp->ns_sref++;
501					    nfsd->nfsd_slp = slp;
502					    break;
503				    }
504				}
505				if (slp == 0)
506					nfsd_head_flag &= ~NFSD_CHECKSLP;
507			}
508			if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0)
509				continue;
510			if (slp->ns_flag & SLP_VALID) {
511				if (slp->ns_flag & SLP_DISCONN)
512					nfsrv_zapsock(slp);
513				else if (slp->ns_flag & SLP_NEEDQ) {
514					slp->ns_flag &= ~SLP_NEEDQ;
515					(void) nfs_sndlock(&slp->ns_solock,
516						(struct nfsreq *)0);
517					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
518						M_WAIT);
519					nfs_sndunlock(&slp->ns_solock);
520				}
521				error = nfsrv_dorec(slp, nfsd, &nd);
522				cur_usec = (u_quad_t)time.tv_sec * 1000000 +
523					(u_quad_t)time.tv_usec;
524				if (error && slp->ns_tq.lh_first &&
525				    slp->ns_tq.lh_first->nd_time <= cur_usec) {
526					error = 0;
527					cacherep = RC_DOIT;
528					writes_todo = 1;
529				} else
530					writes_todo = 0;
531				nfsd->nfsd_flag |= NFSD_REQINPROG;
532			}
533		} else {
534			error = 0;
535			slp = nfsd->nfsd_slp;
536		}
537		if (error || (slp->ns_flag & SLP_VALID) == 0) {
538			if (nd) {
539				free((caddr_t)nd, M_NFSRVDESC);
540				nd = NULL;
541			}
542			nfsd->nfsd_slp = (struct nfssvc_sock *)0;
543			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
544			nfsrv_slpderef(slp);
545			continue;
546		}
547		splx(s);
548		so = slp->ns_so;
549		sotype = so->so_type;
550		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
551			solockp = &slp->ns_solock;
552		else
553			solockp = (int *)0;
554		if (nd) {
555		    gettime(&nd->nd_starttime);
556		    if (nd->nd_nam2)
557			nd->nd_nam = nd->nd_nam2;
558		    else
559			nd->nd_nam = slp->ns_nam;
560
561		    /*
562		     * Check to see if authorization is needed.
563		     */
564		    if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
565			nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
566			nsd->nsd_haddr = mtod(nd->nd_nam,
567			    struct sockaddr_in *)->sin_addr.s_addr;
568			nsd->nsd_authlen = nfsd->nfsd_authlen;
569			nsd->nsd_verflen = nfsd->nfsd_verflen;
570			if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr,
571				nfsd->nfsd_authlen) &&
572			    !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr,
573				nfsd->nfsd_verflen) &&
574			    !copyout((caddr_t)nsd, argp, sizeof (*nsd)))
575			    return (ENEEDAUTH);
576			cacherep = RC_DROPIT;
577		    } else
578			cacherep = nfsrv_getcache(nd, slp, &mreq);
579
580		    /*
581		     * Check for just starting up for NQNFS and send
582		     * fake "try again later" replies to the NQNFS clients.
583		     */
584		    if (notstarted && nqnfsstarttime <= time.tv_sec) {
585			if (modify_flag) {
586				nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
587				modify_flag = 0;
588			} else
589				notstarted = 0;
590		    }
591		    if (notstarted) {
592			if ((nd->nd_flag & ND_NQNFS) == 0)
593				cacherep = RC_DROPIT;
594			else if (nd->nd_procnum != NFSPROC_WRITE) {
595				nd->nd_procnum = NFSPROC_NOOP;
596				nd->nd_repstat = NQNFS_TRYLATER;
597				cacherep = RC_DOIT;
598			} else
599				modify_flag = 1;
600		    } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
601			nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
602			nd->nd_procnum = NFSPROC_NOOP;
603			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
604			cacherep = RC_DOIT;
605		    } else if (nfs_privport) {
606			/* Check if source port is privileged */
607			u_short port;
608			u_long  addr;
609			struct mbuf *nam = nd->nd_nam;
610			struct sockaddr_in *sin;
611
612			sin = mtod(nam, struct sockaddr_in *);
613			port = ntohs(sin->sin_port);
614			if (port >= IPPORT_RESERVED &&
615			    nd->nd_procnum != NFSPROC_NULL) {
616			    nd->nd_procnum = NFSPROC_NOOP;
617			    nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
618			    cacherep = RC_DOIT;
619			    printf("NFS request from unprivileged port (%s:%d)\n",
620				   inet_ntoa(sin->sin_addr), port);
621			}
622		    }
623
624		}
625
626		/*
627		 * Loop to get all the write rpc relies that have been
628		 * gathered together.
629		 */
630		do {
631		    switch (cacherep) {
632		    case RC_DOIT:
633			if (nd && (nd->nd_flag & ND_NFSV3))
634			    procrastinate = nfsrvw_procrastinate_v3;
635			else
636			    procrastinate = nfsrvw_procrastinate;
637			if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE &&
638			    procrastinate > 0 && !notstarted))
639			    error = nfsrv_writegather(&nd, slp,
640				nfsd->nfsd_procp, &mreq);
641			else
642			    error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
643				slp, nfsd->nfsd_procp, &mreq);
644			if (mreq == NULL)
645				break;
646			if (error) {
647				if (nd->nd_procnum != NQNFSPROC_VACATED)
648					nfsstats.srv_errs++;
649				nfsrv_updatecache(nd, FALSE, mreq);
650				if (nd->nd_nam2)
651					m_freem(nd->nd_nam2);
652				break;
653			}
654			nfsstats.srvrpccnt[nd->nd_procnum]++;
655			nfsrv_updatecache(nd, TRUE, mreq);
656			nd->nd_mrep = (struct mbuf *)0;
657		    case RC_REPLY:
658			m = mreq;
659			siz = 0;
660			while (m) {
661				siz += m->m_len;
662				m = m->m_next;
663			}
664			if (siz <= 0 || siz > NFS_MAXPACKET) {
665				printf("mbuf siz=%d\n",siz);
666				panic("Bad nfs svc reply");
667			}
668			m = mreq;
669			m->m_pkthdr.len = siz;
670			m->m_pkthdr.rcvif = (struct ifnet *)0;
671			/*
672			 * For stream protocols, prepend a Sun RPC
673			 * Record Mark.
674			 */
675			if (sotype == SOCK_STREAM) {
676				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
677				*mtod(m, u_long *) = htonl(0x80000000 | siz);
678			}
679			if (solockp)
680				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
681			if (slp->ns_flag & SLP_VALID)
682			    error = nfs_send(so, nd->nd_nam2, m, NULL);
683			else {
684			    error = EPIPE;
685			    m_freem(m);
686			}
687			if (nfsrtton)
688				nfsd_rt(sotype, nd, cacherep);
689			if (nd->nd_nam2)
690				MFREE(nd->nd_nam2, m);
691			if (nd->nd_mrep)
692				m_freem(nd->nd_mrep);
693			if (error == EPIPE)
694				nfsrv_zapsock(slp);
695			if (solockp)
696				nfs_sndunlock(solockp);
697			if (error == EINTR || error == ERESTART) {
698				free((caddr_t)nd, M_NFSRVDESC);
699				nfsrv_slpderef(slp);
700				s = splnet();
701				goto done;
702			}
703			break;
704		    case RC_DROPIT:
705			if (nfsrtton)
706				nfsd_rt(sotype, nd, cacherep);
707			m_freem(nd->nd_mrep);
708			m_freem(nd->nd_nam2);
709			break;
710		    };
711		    if (nd) {
712			FREE((caddr_t)nd, M_NFSRVDESC);
713			nd = NULL;
714		    }
715
716		    /*
717		     * Check to see if there are outstanding writes that
718		     * need to be serviced.
719		     */
720		    cur_usec = (u_quad_t)time.tv_sec * 1000000 +
721			(u_quad_t)time.tv_usec;
722		    s = splsoftclock();
723		    if (slp->ns_tq.lh_first &&
724			slp->ns_tq.lh_first->nd_time <= cur_usec) {
725			cacherep = RC_DOIT;
726			writes_todo = 1;
727		    } else
728			writes_todo = 0;
729		    splx(s);
730		} while (writes_todo);
731		s = splnet();
732		if (nfsrv_dorec(slp, nfsd, &nd)) {
733			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
734			nfsd->nfsd_slp = NULL;
735			nfsrv_slpderef(slp);
736		}
737	}
738done:
739	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
740	splx(s);
741	free((caddr_t)nfsd, M_NFSD);
742	nsd->nsd_nfsd = (struct nfsd *)0;
743	if (--nfs_numnfsd == 0)
744		nfsrv_init(TRUE);	/* Reinitialize everything */
745	return (error);
746}
747#endif /* NFS_NOSERVER */
748
749int nfs_defect = 0;
750SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
751
752/*
753 * Asynchronous I/O daemons for client nfs.
754 * They do read-ahead and write-behind operations on the block I/O cache.
755 * Never returns unless it fails or gets killed.
756 */
757static int
758nfssvc_iod(p)
759	struct proc *p;
760{
761	register struct buf *bp, *nbp;
762	register int i, myiod;
763	struct vnode *vp;
764	struct nfsmount *nmp;
765	int error = 0, s;
766
767	/*
768	 * Assign my position or return error if too many already running
769	 */
770	myiod = -1;
771	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
772		if (nfs_asyncdaemon[i] == 0) {
773			nfs_asyncdaemon[i]++;
774			myiod = i;
775			break;
776		}
777	if (myiod == -1)
778		return (EBUSY);
779	nfs_numasync++;
780	/*
781	 * Just loop around doin our stuff until SIGKILL
782	 */
783	for (;;) {
784	    while (((nmp = nfs_iodmount[myiod]) == NULL
785		    || nmp->nm_bufq.tqh_first == NULL)
786		   && error == 0) {
787		if (nmp)
788		    nmp->nm_bufqiods--;
789		nfs_iodwant[myiod] = p;
790		nfs_iodmount[myiod] = NULL;
791		error = tsleep((caddr_t)&nfs_iodwant[myiod],
792			PWAIT | PCATCH, "nfsidl", 0);
793	    }
794	    if (error) {
795		nfs_asyncdaemon[myiod] = 0;
796		if (nmp) nmp->nm_bufqiods--;
797		nfs_iodmount[myiod] = NULL;
798		nfs_numasync--;
799		return (error);
800	    }
801	    while ((bp = nmp->nm_bufq.tqh_first) != NULL) {
802		/* Take one off the front of the list */
803		TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
804		nmp->nm_bufqlen--;
805		if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
806		    nmp->nm_bufqwant = FALSE;
807		    wakeup(&nmp->nm_bufq);
808		}
809		if (bp->b_flags & B_READ)
810		    (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
811		else
812		    (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
813
814		/*
815		 * If there are more than one iod on this mount, then defect
816		 * so that the iods can be shared out fairly between the mounts
817		 */
818		if (nfs_defect && nmp->nm_bufqiods > 1) {
819		    NFS_DPF(ASYNCIO,
820			    ("nfssvc_iod: iod %d defecting from mount %p\n",
821			     myiod, nmp));
822		    nfs_iodmount[myiod] = NULL;
823		    nmp->nm_bufqiods--;
824		    break;
825		}
826	    }
827	}
828}
829
830/*
831 * Shut down a socket associated with an nfssvc_sock structure.
832 * Should be called with the send lock set, if required.
833 * The trick here is to increment the sref at the start, so that the nfsds
834 * will stop using it and clear ns_flag at the end so that it will not be
835 * reassigned during cleanup.
836 */
837static void
838nfsrv_zapsock(slp)
839	register struct nfssvc_sock *slp;
840{
841	register struct nfsuid *nuidp, *nnuidp;
842	register struct nfsrv_descript *nwp, *nnwp;
843	struct socket *so;
844	struct file *fp;
845	struct mbuf *m;
846	int s;
847
848	slp->ns_flag &= ~SLP_ALLFLAGS;
849	fp = slp->ns_fp;
850	if (fp) {
851		slp->ns_fp = (struct file *)0;
852		so = slp->ns_so;
853		so->so_upcall = NULL;
854		soshutdown(so, 2);
855		closef(fp, (struct proc *)0);
856		if (slp->ns_nam)
857			MFREE(slp->ns_nam, m);
858		m_freem(slp->ns_raw);
859		m_freem(slp->ns_rec);
860		for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0;
861		    nuidp = nnuidp) {
862			nnuidp = nuidp->nu_lru.tqe_next;
863			LIST_REMOVE(nuidp, nu_hash);
864			TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
865			if (nuidp->nu_flag & NU_NAM)
866				m_freem(nuidp->nu_nam);
867			free((caddr_t)nuidp, M_NFSUID);
868		}
869		s = splsoftclock();
870		for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
871			nnwp = nwp->nd_tq.le_next;
872			LIST_REMOVE(nwp, nd_tq);
873			free((caddr_t)nwp, M_NFSRVDESC);
874		}
875		LIST_INIT(&slp->ns_tq);
876		splx(s);
877	}
878}
879
880/*
881 * Get an authorization string for the uid by having the mount_nfs sitting
882 * on this mount point porpous out of the kernel and do it.
883 */
884int
885nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key)
886	register struct nfsmount *nmp;
887	struct nfsreq *rep;
888	struct ucred *cred;
889	char **auth_str;
890	int *auth_len;
891	char *verf_str;
892	int *verf_len;
893	NFSKERBKEY_T key;		/* return session key */
894{
895	int error = 0;
896
897	while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
898		nmp->nm_flag |= NFSMNT_WANTAUTH;
899		(void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
900			"nfsauth1", 2 * hz);
901		error = nfs_sigintr(nmp, rep, rep->r_procp);
902		if (error) {
903			nmp->nm_flag &= ~NFSMNT_WANTAUTH;
904			return (error);
905		}
906	}
907	nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
908	nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
909	nmp->nm_authlen = RPCAUTH_MAXSIZ;
910	nmp->nm_verfstr = verf_str;
911	nmp->nm_verflen = *verf_len;
912	nmp->nm_authuid = cred->cr_uid;
913	wakeup((caddr_t)&nmp->nm_authstr);
914
915	/*
916	 * And wait for mount_nfs to do its stuff.
917	 */
918	while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
919		(void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
920			"nfsauth2", 2 * hz);
921		error = nfs_sigintr(nmp, rep, rep->r_procp);
922	}
923	if (nmp->nm_flag & NFSMNT_AUTHERR) {
924		nmp->nm_flag &= ~NFSMNT_AUTHERR;
925		error = EAUTH;
926	}
927	if (error)
928		free((caddr_t)*auth_str, M_TEMP);
929	else {
930		*auth_len = nmp->nm_authlen;
931		*verf_len = nmp->nm_verflen;
932		bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
933	}
934	nmp->nm_flag &= ~NFSMNT_HASAUTH;
935	nmp->nm_flag |= NFSMNT_WAITAUTH;
936	if (nmp->nm_flag & NFSMNT_WANTAUTH) {
937		nmp->nm_flag &= ~NFSMNT_WANTAUTH;
938		wakeup((caddr_t)&nmp->nm_authtype);
939	}
940	return (error);
941}
942
943/*
944 * Get a nickname authenticator and verifier.
945 */
946int
947nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len)
948	struct nfsmount *nmp;
949	struct ucred *cred;
950	char **auth_str;
951	int *auth_len;
952	char *verf_str;
953	int verf_len;
954{
955	register struct nfsuid *nuidp;
956	register u_long *nickp, *verfp;
957	struct timeval ktvin, ktvout;
958
959#ifdef DIAGNOSTIC
960	if (verf_len < (4 * NFSX_UNSIGNED))
961		panic("nfs_getnickauth verf too small");
962#endif
963	for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first;
964	    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
965		if (nuidp->nu_cr.cr_uid == cred->cr_uid)
966			break;
967	}
968	if (!nuidp || nuidp->nu_expire < time.tv_sec)
969		return (EACCES);
970
971	/*
972	 * Move to the end of the lru list (end of lru == most recently used).
973	 */
974	TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
975	TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
976
977	nickp = (u_long *)malloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
978	*nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
979	*nickp = txdr_unsigned(nuidp->nu_nickname);
980	*auth_str = (char *)nickp;
981	*auth_len = 2 * NFSX_UNSIGNED;
982
983	/*
984	 * Now we must encrypt the verifier and package it up.
985	 */
986	verfp = (u_long *)verf_str;
987	*verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
988	if (time.tv_sec > nuidp->nu_timestamp.tv_sec ||
989	    (time.tv_sec == nuidp->nu_timestamp.tv_sec &&
990	     time.tv_usec > nuidp->nu_timestamp.tv_usec))
991		gettime(&nuidp->nu_timestamp);
992	else
993		nuidp->nu_timestamp.tv_usec++;
994	ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
995	ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
996
997	/*
998	 * Now encrypt the timestamp verifier in ecb mode using the session
999	 * key.
1000	 */
1001#ifdef NFSKERB
1002	XXX
1003#endif
1004
1005	*verfp++ = ktvout.tv_sec;
1006	*verfp++ = ktvout.tv_usec;
1007	*verfp = 0;
1008	return (0);
1009}
1010
1011/*
1012 * Save the current nickname in a hash list entry on the mount point.
1013 */
1014int
1015nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep)
1016	register struct nfsmount *nmp;
1017	struct ucred *cred;
1018	int len;
1019	NFSKERBKEY_T key;
1020	struct mbuf **mdp;
1021	char **dposp;
1022	struct mbuf *mrep;
1023{
1024	register struct nfsuid *nuidp;
1025	register u_long *tl;
1026	register long t1;
1027	struct mbuf *md = *mdp;
1028	struct timeval ktvin, ktvout;
1029	u_long nick;
1030	char *dpos = *dposp, *cp2;
1031	int deltasec, error = 0;
1032
1033	if (len == (3 * NFSX_UNSIGNED)) {
1034		nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1035		ktvin.tv_sec = *tl++;
1036		ktvin.tv_usec = *tl++;
1037		nick = fxdr_unsigned(u_long, *tl);
1038
1039		/*
1040		 * Decrypt the timestamp in ecb mode.
1041		 */
1042#ifdef NFSKERB
1043		XXX
1044#endif
1045		ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
1046		ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
1047		deltasec = time.tv_sec - ktvout.tv_sec;
1048		if (deltasec < 0)
1049			deltasec = -deltasec;
1050		/*
1051		 * If ok, add it to the hash list for the mount point.
1052		 */
1053		if (deltasec <= NFS_KERBCLOCKSKEW) {
1054			if (nmp->nm_numuids < nuidhash_max) {
1055				nmp->nm_numuids++;
1056				nuidp = (struct nfsuid *)
1057				   malloc(sizeof (struct nfsuid), M_NFSUID,
1058					M_WAITOK);
1059			} else {
1060				nuidp = nmp->nm_uidlruhead.tqh_first;
1061				LIST_REMOVE(nuidp, nu_hash);
1062				TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp,
1063					nu_lru);
1064			}
1065			nuidp->nu_flag = 0;
1066			nuidp->nu_cr.cr_uid = cred->cr_uid;
1067			nuidp->nu_expire = time.tv_sec + NFS_KERBTTL;
1068			nuidp->nu_timestamp = ktvout;
1069			nuidp->nu_nickname = nick;
1070			bcopy(key, nuidp->nu_key, sizeof (key));
1071			TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp,
1072				nu_lru);
1073			LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid),
1074				nuidp, nu_hash);
1075		}
1076	} else
1077		nfsm_adv(nfsm_rndup(len));
1078nfsmout:
1079	*mdp = md;
1080	*dposp = dpos;
1081	return (error);
1082}
1083
1084#ifndef NFS_NOSERVER
1085
1086/*
1087 * Derefence a server socket structure. If it has no more references and
1088 * is no longer valid, you can throw it away.
1089 */
1090void
1091nfsrv_slpderef(slp)
1092	register struct nfssvc_sock *slp;
1093{
1094	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
1095		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1096		free((caddr_t)slp, M_NFSSVC);
1097	}
1098}
1099
1100/*
1101 * Initialize the data structures for the server.
1102 * Handshake with any new nfsds starting up to avoid any chance of
1103 * corruption.
1104 */
1105void
1106nfsrv_init(terminating)
1107	int terminating;
1108{
1109	register struct nfssvc_sock *slp, *nslp;
1110
1111	if (nfssvc_sockhead_flag & SLP_INIT)
1112		panic("nfsd init");
1113	nfssvc_sockhead_flag |= SLP_INIT;
1114	if (terminating) {
1115		for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) {
1116			nslp = slp->ns_chain.tqe_next;
1117			if (slp->ns_flag & SLP_VALID)
1118				nfsrv_zapsock(slp);
1119			TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
1120			free((caddr_t)slp, M_NFSSVC);
1121		}
1122		nfsrv_cleancache();	/* And clear out server cache */
1123	}
1124
1125	TAILQ_INIT(&nfssvc_sockhead);
1126	nfssvc_sockhead_flag &= ~SLP_INIT;
1127	if (nfssvc_sockhead_flag & SLP_WANTINIT) {
1128		nfssvc_sockhead_flag &= ~SLP_WANTINIT;
1129		wakeup((caddr_t)&nfssvc_sockhead);
1130	}
1131
1132	TAILQ_INIT(&nfsd_head);
1133	nfsd_head_flag &= ~NFSD_CHECKSLP;
1134
1135	nfs_udpsock = (struct nfssvc_sock *)
1136	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1137	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
1138	TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
1139	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
1140
1141	nfs_cltpsock = (struct nfssvc_sock *)
1142	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
1143	bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
1144	TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
1145	TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
1146}
1147
1148/*
1149 * Add entries to the server monitor log.
1150 */
1151static void
1152nfsd_rt(sotype, nd, cacherep)
1153	int sotype;
1154	register struct nfsrv_descript *nd;
1155	int cacherep;
1156{
1157	register struct drt *rt;
1158
1159	rt = &nfsdrt.drt[nfsdrt.pos];
1160	if (cacherep == RC_DOIT)
1161		rt->flag = 0;
1162	else if (cacherep == RC_REPLY)
1163		rt->flag = DRT_CACHEREPLY;
1164	else
1165		rt->flag = DRT_CACHEDROP;
1166	if (sotype == SOCK_STREAM)
1167		rt->flag |= DRT_TCP;
1168	if (nd->nd_flag & ND_NQNFS)
1169		rt->flag |= DRT_NQNFS;
1170	else if (nd->nd_flag & ND_NFSV3)
1171		rt->flag |= DRT_NFSV3;
1172	rt->proc = nd->nd_procnum;
1173	if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET)
1174	    rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr;
1175	else
1176	    rt->ipadr = INADDR_ANY;
1177	rt->resptime = ((time.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
1178		(time.tv_usec - nd->nd_starttime.tv_usec);
1179	gettime(&rt->tstamp);
1180	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
1181}
1182#endif /* NFS_NOSERVER */
1183