nfs_srvkrpc.c revision 194073
1214501Srpaulo/*-
2214501Srpaulo * Copyright (c) 1989, 1993
3214501Srpaulo *	The Regents of the University of California.  All rights reserved.
4214501Srpaulo *
5252726Srpaulo * This code is derived from software contributed to Berkeley by
6252726Srpaulo * Rick Macklem at The University of Guelph.
7214501Srpaulo *
8214501Srpaulo * Redistribution and use in source and binary forms, with or without
9214501Srpaulo * modification, are permitted provided that the following conditions
10214501Srpaulo * are met:
11214501Srpaulo * 1. Redistributions of source code must retain the above copyright
12214501Srpaulo *    notice, this list of conditions and the following disclaimer.
13214501Srpaulo * 2. Redistributions in binary form must reproduce the above copyright
14214501Srpaulo *    notice, this list of conditions and the following disclaimer in the
15214501Srpaulo *    documentation and/or other materials provided with the distribution.
16214501Srpaulo * 4. Neither the name of the University nor the names of its contributors
17214501Srpaulo *    may be used to endorse or promote products derived from this software
18214501Srpaulo *    without specific prior written permission.
19214501Srpaulo *
20214501Srpaulo * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21214501Srpaulo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22214501Srpaulo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23214501Srpaulo * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24214501Srpaulo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25214501Srpaulo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26214501Srpaulo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27214501Srpaulo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28214501Srpaulo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29252726Srpaulo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30252726Srpaulo * SUCH DAMAGE.
31252726Srpaulo *
32252726Srpaulo *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
33252726Srpaulo */
34252726Srpaulo
35252726Srpaulo#include <sys/cdefs.h>
36252726Srpaulo__FBSDID("$FreeBSD: head/sys/nfsserver/nfs_srvkrpc.c 194073 2009-06-12 20:18:08Z rmacklem $");
37214501Srpaulo
38252726Srpaulo#include "opt_inet6.h"
39252726Srpaulo#include "opt_kgssapi.h"
40252726Srpaulo
41214501Srpaulo#include <sys/param.h>
42214501Srpaulo#include <sys/systm.h>
43214501Srpaulo#include <sys/sysproto.h>
44214501Srpaulo#include <sys/kernel.h>
45214501Srpaulo#include <sys/sysctl.h>
46214501Srpaulo#include <sys/file.h>
47214501Srpaulo#include <sys/filedesc.h>
48214501Srpaulo#include <sys/jail.h>
49214501Srpaulo#include <sys/vnode.h>
50214501Srpaulo#include <sys/malloc.h>
51214501Srpaulo#include <sys/mount.h>
52214501Srpaulo#include <sys/priv.h>
53214501Srpaulo#include <sys/proc.h>
54214501Srpaulo#include <sys/bio.h>
55214501Srpaulo#include <sys/buf.h>
56214501Srpaulo#include <sys/mbuf.h>
57214501Srpaulo#include <sys/socket.h>
58214501Srpaulo#include <sys/socketvar.h>
59214501Srpaulo#include <sys/domain.h>
60214501Srpaulo#include <sys/protosw.h>
61214501Srpaulo#include <sys/namei.h>
62214501Srpaulo#include <sys/fcntl.h>
63214501Srpaulo#include <sys/lockf.h>
64214501Srpaulo#include <sys/eventhandler.h>
65214501Srpaulo
66214501Srpaulo#include <netinet/in.h>
67214501Srpaulo#include <netinet/tcp.h>
68214501Srpaulo#ifdef INET6
69214501Srpaulo#include <net/if.h>
70214501Srpaulo#include <netinet6/in6_var.h>
71214501Srpaulo#endif
72214501Srpaulo
73214501Srpaulo#include <rpc/rpc.h>
74214501Srpaulo#include <rpc/rpcsec_gss.h>
75214501Srpaulo#include <rpc/replay.h>
76214501Srpaulo
77214501Srpaulo#include <nfs/xdr_subs.h>
78214501Srpaulo#include <nfs/rpcv2.h>
79214501Srpaulo#include <nfs/nfsproto.h>
80214501Srpaulo#include <nfsserver/nfs.h>
81252726Srpaulo#include <nfsserver/nfsm_subs.h>
82252726Srpaulo#include <nfsserver/nfsrvcache.h>
83252726Srpaulo#include <nfsserver/nfs_fha.h>
84252726Srpaulo
85252726Srpaulo#include <security/mac/mac_framework.h>
86252726Srpaulo
87252726Srpaulo#ifndef NFS_LEGACYRPC
88252726Srpaulo
89252726Srpaulostatic MALLOC_DEFINE(M_NFSSVC, "nfss_srvsock", "Nfs server structure");
90252726Srpaulo
91252726SrpauloMALLOC_DEFINE(M_NFSRVDESC, "nfss_srvdesc", "NFS server socket descriptor");
92252726SrpauloMALLOC_DEFINE(M_NFSD, "nfss_daemon", "Nfs server daemon structure");
93252726Srpaulo
94252726Srpaulo#define	TRUE	1
95252726Srpaulo#define	FALSE	0
96252726Srpaulo
97252726SrpauloSYSCTL_DECL(_vfs_nfsrv);
98252726Srpaulo
99252726SrpauloSVCPOOL		*nfsrv_pool;
100252726Srpauloint		nfsd_waiting = 0;
101252726Srpauloint		nfsrv_numnfsd = 0;
102252726Srpaulostatic int	nfs_realign_test;
103252726Srpaulostatic int	nfs_realign_count;
104252726Srpaulostruct callout	nfsrv_callout;
105252726Srpaulostatic eventhandler_tag nfsrv_nmbclusters_tag;
106252726Srpaulo
107252726Srpaulostatic int	nfs_privport = 0;
108252726SrpauloSYSCTL_INT(_vfs_nfsrv, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW,
109252726Srpaulo    &nfs_privport, 0,
110252726Srpaulo    "Only allow clients using a privileged port");
111214501SrpauloSYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay, CTLFLAG_RW,
112214501Srpaulo    &nfsrvw_procrastinate, 0,
113214501Srpaulo    "Delay value for write gathering");
114SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay_v3, CTLFLAG_RW,
115    &nfsrvw_procrastinate_v3, 0,
116    "Delay in seconds for NFSv3 write gathering");
117SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_test, CTLFLAG_RW,
118	    &nfs_realign_test, 0, "");
119SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_count, CTLFLAG_RW,
120	    &nfs_realign_count, 0, "");
121
122static int	nfssvc_addsock(struct file *, struct thread *);
123static int	nfssvc_nfsd(struct thread *, struct nfsd_nfsd_args *);
124
125extern u_long sb_max_adj;
126
127int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
128    struct nfssvc_sock *slp, struct mbuf **mreqp) = {
129	nfsrv_null,
130	nfsrv_getattr,
131	nfsrv_setattr,
132	nfsrv_lookup,
133	nfsrv3_access,
134	nfsrv_readlink,
135	nfsrv_read,
136	nfsrv_write,
137	nfsrv_create,
138	nfsrv_mkdir,
139	nfsrv_symlink,
140	nfsrv_mknod,
141	nfsrv_remove,
142	nfsrv_rmdir,
143	nfsrv_rename,
144	nfsrv_link,
145	nfsrv_readdir,
146	nfsrv_readdirplus,
147	nfsrv_statfs,
148	nfsrv_fsinfo,
149	nfsrv_pathconf,
150	nfsrv_commit,
151	nfsrv_noop
152};
153
154/*
155 * NFS server system calls
156 */
157/*
158 * This is now called from nfssvc() in nfs/nfs_nfssvc.c.
159 */
160
161/*
162 * Nfs server psuedo system call for the nfsd's
163 * Based on the flag value it either:
164 * - adds a socket to the selection list
165 * - remains in the kernel as an nfsd
166 * - remains in the kernel as an nfsiod
167 * For INET6 we suppose that nfsd provides only IN6P_IPV6_V6ONLY sockets
168 * and that mountd provides
169 *  - sockaddr with no IPv4-mapped addresses
170 *  - mask for both INET and INET6 families if there is IPv4-mapped overlap
171 */
172int
173nfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap)
174{
175	struct file *fp;
176	struct nfsd_addsock_args addsockarg;
177	struct nfsd_nfsd_args nfsdarg;
178	int error;
179
180	if (uap->flag & NFSSVC_ADDSOCK) {
181		error = copyin(uap->argp, (caddr_t)&addsockarg,
182		    sizeof(addsockarg));
183		if (error)
184			return (error);
185		if ((error = fget(td, addsockarg.sock, &fp)) != 0)
186			return (error);
187		if (fp->f_type != DTYPE_SOCKET) {
188			fdrop(fp, td);
189			return (error);	/* XXXRW: Should be EINVAL? */
190		}
191		error = nfssvc_addsock(fp, td);
192		fdrop(fp, td);
193	} else if (uap->flag & NFSSVC_OLDNFSD) {
194		error = nfssvc_nfsd(td, NULL);
195	} else if (uap->flag & NFSSVC_NFSD) {
196		if (!uap->argp)
197			return (EINVAL);
198		error = copyin(uap->argp, (caddr_t)&nfsdarg,
199		    sizeof(nfsdarg));
200		if (error)
201			return (error);
202		error = nfssvc_nfsd(td, &nfsdarg);
203	} else {
204		error = ENXIO;
205	}
206	return (error);
207}
208
209/*
210 * Generate the rpc reply header
211 * siz arg. is used to decide if adding a cluster is worthwhile
212 */
213struct mbuf *
214nfs_rephead(int siz, struct nfsrv_descript *nd, int err,
215    struct mbuf **mbp, caddr_t *bposp)
216{
217	u_int32_t *tl;
218	struct mbuf *mreq;
219	caddr_t bpos;
220	struct mbuf *mb;
221
222	if (err == EBADRPC)
223		return (NULL);
224
225	nd->nd_repstat = err;
226	if (err && (nd->nd_flag & ND_NFSV3) == 0)	/* XXX recheck */
227		siz = 0;
228
229	MGET(mreq, M_WAIT, MT_DATA);
230
231	/*
232	 * If this is a big reply, use a cluster
233	 */
234	mreq->m_len = 0;
235	if (siz >= MINCLSIZE) {
236		MCLGET(mreq, M_WAIT);
237	}
238	mb = mreq;
239	bpos = mtod(mb, caddr_t);
240
241	if (err != NFSERR_RETVOID) {
242		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
243		if (err)
244			*tl = txdr_unsigned(nfsrv_errmap(nd, err));
245		else
246			*tl = 0;
247	}
248
249	*mbp = mb;
250	*bposp = bpos;
251	if (err != 0 && err != NFSERR_RETVOID)
252		nfsrvstats.srvrpc_errs++;
253
254	return (mreq);
255}
256
257/*
258 *	nfs_realign:
259 *
260 *	Check for badly aligned mbuf data and realign by copying the unaligned
261 *	portion of the data into a new mbuf chain and freeing the portions
262 *	of the old chain that were replaced.
263 *
264 *	We cannot simply realign the data within the existing mbuf chain
265 *	because the underlying buffers may contain other rpc commands and
266 *	we cannot afford to overwrite them.
267 *
268 *	We would prefer to avoid this situation entirely.  The situation does
269 *	not occur with NFS/UDP and is supposed to only occassionally occur
270 *	with TCP.  Use vfs.nfs.realign_count and realign_test to check this.
271 */
272static void
273nfs_realign(struct mbuf **pm)	/* XXX COMMON */
274{
275	struct mbuf *m;
276	struct mbuf *n = NULL;
277	int off = 0;
278
279	++nfs_realign_test;
280	while ((m = *pm) != NULL) {
281		if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
282			MGET(n, M_WAIT, MT_DATA);
283			if (m->m_len >= MINCLSIZE) {
284				MCLGET(n, M_WAIT);
285			}
286			n->m_len = 0;
287			break;
288		}
289		pm = &m->m_next;
290	}
291
292	/*
293	 * If n is non-NULL, loop on m copying data, then replace the
294	 * portion of the chain that had to be realigned.
295	 */
296	if (n != NULL) {
297		++nfs_realign_count;
298		while (m) {
299			m_copyback(n, off, m->m_len, mtod(m, caddr_t));
300			off += m->m_len;
301			m = m->m_next;
302		}
303		m_freem(*pm);
304		*pm = n;
305	}
306}
307
308static void
309nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt)
310{
311	rpcproc_t procnum;
312	int32_t (*proc)(struct nfsrv_descript *nd, struct nfssvc_sock *slp,
313	    struct mbuf **mreqp);
314	int flag;
315	struct nfsrv_descript nd;
316	struct mbuf *mreq, *mrep;
317	int error;
318
319	if (rqst->rq_vers == NFS_VER2) {
320		if (rqst->rq_proc > NFSV2PROC_STATFS) {
321			svcerr_noproc(rqst);
322			svc_freereq(rqst);
323			return;
324		}
325		procnum = nfsrv_nfsv3_procid[rqst->rq_proc];
326		flag = 0;
327	} else {
328		if (rqst->rq_proc >= NFS_NPROCS) {
329			svcerr_noproc(rqst);
330			svc_freereq(rqst);
331			return;
332		}
333		procnum = rqst->rq_proc;
334		flag = ND_NFSV3;
335	}
336	proc = nfsrv3_procs[procnum];
337
338	mreq = mrep = NULL;
339	mreq = rqst->rq_args;
340	rqst->rq_args = NULL;
341	nfs_realign(&mreq);
342
343	/*
344	 * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 -
345	 * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP
346	 * mounts.
347	 */
348	memset(&nd, 0, sizeof(nd));
349	nd.nd_md = nd.nd_mrep = mreq;
350	nd.nd_dpos = mtod(mreq, caddr_t);
351	nd.nd_nam = svc_getrpccaller(rqst);
352	nd.nd_nam2 = rqst->rq_addr;
353	nd.nd_procnum = procnum;
354	nd.nd_cr = NULL;
355	nd.nd_flag = flag;
356
357	if (nfs_privport) {
358		/* Check if source port is privileged */
359		u_short port;
360		struct sockaddr *nam = nd.nd_nam;
361		struct sockaddr_in *sin;
362
363		sin = (struct sockaddr_in *)nam;
364		/*
365		 * INET/INET6 - same code:
366		 *    sin_port and sin6_port are at same offset
367		 */
368		port = ntohs(sin->sin_port);
369		if (port >= IPPORT_RESERVED &&
370		    nd.nd_procnum != NFSPROC_NULL) {
371#ifdef INET6
372			char b6[INET6_ADDRSTRLEN];
373#if defined(KLD_MODULE)
374			/* Do not use ip6_sprintf: the nfs module should work without INET6. */
375#define ip6_sprintf(buf, a)						\
376			(sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x",	\
377			    (a)->s6_addr16[0], (a)->s6_addr16[1],	\
378			    (a)->s6_addr16[2], (a)->s6_addr16[3],	\
379			    (a)->s6_addr16[4], (a)->s6_addr16[5],	\
380			    (a)->s6_addr16[6], (a)->s6_addr16[7]),	\
381			    (buf))
382#endif
383#endif
384			printf("NFS request from unprivileged port (%s:%d)\n",
385#ifdef INET6
386			    sin->sin_family == AF_INET6 ?
387			    ip6_sprintf(b6, &satosin6(sin)->sin6_addr) :
388#if defined(KLD_MODULE)
389#undef ip6_sprintf
390#endif
391#endif
392			    inet_ntoa(sin->sin_addr), port);
393			m_freem(mreq);
394			svcerr_weakauth(rqst);
395			svc_freereq(rqst);
396			return;
397		}
398	}
399
400	if (proc != nfsrv_null) {
401		if (!svc_getcred(rqst, &nd.nd_cr, &nd.nd_credflavor)) {
402			m_freem(mreq);
403			svcerr_weakauth(rqst);
404			svc_freereq(rqst);
405			return;
406		}
407#ifdef MAC
408		mac_cred_associate_nfsd(nd.nd_cr);
409#endif
410	}
411	nfsrvstats.srvrpccnt[nd.nd_procnum]++;
412
413	error = proc(&nd, NULL, &mrep);
414
415	if (nd.nd_cr)
416		crfree(nd.nd_cr);
417
418	if (mrep == NULL) {
419		svcerr_decode(rqst);
420		svc_freereq(rqst);
421		return;
422	}
423	if (error && error != NFSERR_RETVOID) {
424		svcerr_systemerr(rqst);
425		svc_freereq(rqst);
426		return;
427	}
428	if (nd.nd_repstat & NFSERR_AUTHERR) {
429		svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR);
430		m_freem(mrep);
431	} else {
432		if (!svc_sendreply_mbuf(rqst, mrep))
433			svcerr_systemerr(rqst);
434	}
435	svc_freereq(rqst);
436}
437
438/*
439 * Adds a socket to the list for servicing by nfsds.
440 */
441static int
442nfssvc_addsock(struct file *fp, struct thread *td)
443{
444	int siz;
445	struct socket *so;
446	int error;
447	SVCXPRT *xprt;
448
449	so = fp->f_data;
450
451	siz = sb_max_adj;
452	error = soreserve(so, siz, siz);
453	if (error) {
454		return (error);
455	}
456
457	/*
458	 * Steal the socket from userland so that it doesn't close
459	 * unexpectedly.
460	 */
461	if (so->so_type == SOCK_DGRAM)
462		xprt = svc_dg_create(nfsrv_pool, so, 0, 0);
463	else
464		xprt = svc_vc_create(nfsrv_pool, so, 0, 0);
465	if (xprt) {
466		fp->f_ops = &badfileops;
467		fp->f_data = NULL;
468		svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL);
469		svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL);
470	}
471
472	return (0);
473}
474
475/*
476 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
477 * until it is killed by a signal.
478 */
479static int
480nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args)
481{
482#ifdef KGSSAPI
483	char principal[128];
484	int error;
485#endif
486
487#ifdef KGSSAPI
488	if (args) {
489		error = copyinstr(args->principal, principal,
490		    sizeof(principal), NULL);
491		if (error)
492			return (error);
493	} else {
494		memcpy(principal, "nfs@", 4);
495		getcredhostname(td->td_ucred, principal + 4,
496		    sizeof(principal) - 4);
497	}
498#endif
499
500	/*
501	 * Only the first nfsd actually does any work. The RPC code
502	 * adds threads to it as needed. Any extra processes offered
503	 * by nfsd just exit. If nfsd is new enough, it will call us
504	 * once with a structure that specifies how many threads to
505	 * use.
506	 */
507	NFSD_LOCK();
508	if (nfsrv_numnfsd == 0) {
509		nfsrv_numnfsd++;
510
511		NFSD_UNLOCK();
512
513#ifdef KGSSAPI
514		rpc_gss_set_svc_name(principal, "kerberosv5",
515		    GSS_C_INDEFINITE, NFS_PROG, NFS_VER2);
516		rpc_gss_set_svc_name(principal, "kerberosv5",
517		    GSS_C_INDEFINITE, NFS_PROG, NFS_VER3);
518#endif
519
520		if (args) {
521			nfsrv_pool->sp_minthreads = args->minthreads;
522			nfsrv_pool->sp_maxthreads = args->maxthreads;
523		} else {
524			nfsrv_pool->sp_minthreads = 4;
525			nfsrv_pool->sp_maxthreads = 4;
526		}
527
528		svc_run(nfsrv_pool);
529
530#ifdef KGSSAPI
531		rpc_gss_clear_svc_name(NFS_PROG, NFS_VER2);
532		rpc_gss_clear_svc_name(NFS_PROG, NFS_VER3);
533#endif
534
535		NFSD_LOCK();
536		nfsrv_numnfsd--;
537		nfsrv_init(TRUE);
538	}
539	NFSD_UNLOCK();
540
541	return (0);
542}
543
544/*
545 * Size the NFS server's duplicate request cache at 1/2 the
546 * nmbclusters, floating within a (64, 2048) range. This is to
547 * prevent all mbuf clusters being tied up in the NFS dupreq
548 * cache for small values of nmbclusters.
549 */
550static size_t
551nfsrv_replay_size(void)
552{
553	size_t replaysiz;
554
555	replaysiz = nmbclusters / 2;
556	if (replaysiz > NFSRVCACHE_MAX_SIZE)
557		replaysiz = NFSRVCACHE_MAX_SIZE;
558	if (replaysiz < NFSRVCACHE_MIN_SIZE)
559		replaysiz = NFSRVCACHE_MIN_SIZE;
560	replaysiz *= MCLBYTES;
561
562	return (replaysiz);
563}
564
565/*
566 * Called when nmbclusters changes - we resize the replay cache
567 * accordingly.
568 */
569static void
570nfsrv_nmbclusters_change(void *tag)
571{
572
573	if (nfsrv_pool)
574		replay_setsize(nfsrv_pool->sp_rcache, nfsrv_replay_size());
575}
576
577/*
578 * Initialize the data structures for the server.
579 * Handshake with any new nfsds starting up to avoid any chance of
580 * corruption.
581 */
582void
583nfsrv_init(int terminating)
584{
585
586	NFSD_LOCK_ASSERT();
587
588	if (terminating) {
589		NFSD_UNLOCK();
590		EVENTHANDLER_DEREGISTER(nmbclusters_change,
591		    nfsrv_nmbclusters_tag);
592		svcpool_destroy(nfsrv_pool);
593		nfsrv_pool = NULL;
594		NFSD_LOCK();
595	} else
596		nfs_pub.np_valid = 0;
597
598	NFSD_UNLOCK();
599
600	nfsrv_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsrv));
601	nfsrv_pool->sp_rcache = replay_newcache(nfsrv_replay_size());
602	nfsrv_pool->sp_assign = fha_assign;
603	nfsrv_pool->sp_done = fha_nd_complete;
604	nfsrv_nmbclusters_tag = EVENTHANDLER_REGISTER(nmbclusters_change,
605	    nfsrv_nmbclusters_change, NULL, EVENTHANDLER_PRI_FIRST);
606
607	NFSD_LOCK();
608}
609
610#endif /* !NFS_LEGACYRPC */
611