nfs_srvkrpc.c revision 194073
1214501Srpaulo/*- 2214501Srpaulo * Copyright (c) 1989, 1993 3214501Srpaulo * The Regents of the University of California. All rights reserved. 4214501Srpaulo * 5252726Srpaulo * This code is derived from software contributed to Berkeley by 6252726Srpaulo * Rick Macklem at The University of Guelph. 7214501Srpaulo * 8214501Srpaulo * Redistribution and use in source and binary forms, with or without 9214501Srpaulo * modification, are permitted provided that the following conditions 10214501Srpaulo * are met: 11214501Srpaulo * 1. Redistributions of source code must retain the above copyright 12214501Srpaulo * notice, this list of conditions and the following disclaimer. 13214501Srpaulo * 2. Redistributions in binary form must reproduce the above copyright 14214501Srpaulo * notice, this list of conditions and the following disclaimer in the 15214501Srpaulo * documentation and/or other materials provided with the distribution. 16214501Srpaulo * 4. Neither the name of the University nor the names of its contributors 17214501Srpaulo * may be used to endorse or promote products derived from this software 18214501Srpaulo * without specific prior written permission. 19214501Srpaulo * 20214501Srpaulo * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21214501Srpaulo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22214501Srpaulo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23214501Srpaulo * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24214501Srpaulo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25214501Srpaulo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26214501Srpaulo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27214501Srpaulo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28214501Srpaulo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29252726Srpaulo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30252726Srpaulo * SUCH DAMAGE. 31252726Srpaulo * 32252726Srpaulo * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 33252726Srpaulo */ 34252726Srpaulo 35252726Srpaulo#include <sys/cdefs.h> 36252726Srpaulo__FBSDID("$FreeBSD: head/sys/nfsserver/nfs_srvkrpc.c 194073 2009-06-12 20:18:08Z rmacklem $"); 37214501Srpaulo 38252726Srpaulo#include "opt_inet6.h" 39252726Srpaulo#include "opt_kgssapi.h" 40252726Srpaulo 41214501Srpaulo#include <sys/param.h> 42214501Srpaulo#include <sys/systm.h> 43214501Srpaulo#include <sys/sysproto.h> 44214501Srpaulo#include <sys/kernel.h> 45214501Srpaulo#include <sys/sysctl.h> 46214501Srpaulo#include <sys/file.h> 47214501Srpaulo#include <sys/filedesc.h> 48214501Srpaulo#include <sys/jail.h> 49214501Srpaulo#include <sys/vnode.h> 50214501Srpaulo#include <sys/malloc.h> 51214501Srpaulo#include <sys/mount.h> 52214501Srpaulo#include <sys/priv.h> 53214501Srpaulo#include <sys/proc.h> 54214501Srpaulo#include <sys/bio.h> 55214501Srpaulo#include <sys/buf.h> 56214501Srpaulo#include <sys/mbuf.h> 57214501Srpaulo#include <sys/socket.h> 58214501Srpaulo#include <sys/socketvar.h> 59214501Srpaulo#include <sys/domain.h> 60214501Srpaulo#include <sys/protosw.h> 61214501Srpaulo#include <sys/namei.h> 62214501Srpaulo#include <sys/fcntl.h> 63214501Srpaulo#include <sys/lockf.h> 64214501Srpaulo#include <sys/eventhandler.h> 65214501Srpaulo 66214501Srpaulo#include <netinet/in.h> 67214501Srpaulo#include <netinet/tcp.h> 68214501Srpaulo#ifdef INET6 69214501Srpaulo#include <net/if.h> 70214501Srpaulo#include <netinet6/in6_var.h> 71214501Srpaulo#endif 72214501Srpaulo 73214501Srpaulo#include <rpc/rpc.h> 74214501Srpaulo#include <rpc/rpcsec_gss.h> 75214501Srpaulo#include <rpc/replay.h> 76214501Srpaulo 77214501Srpaulo#include <nfs/xdr_subs.h> 78214501Srpaulo#include <nfs/rpcv2.h> 79214501Srpaulo#include <nfs/nfsproto.h> 80214501Srpaulo#include <nfsserver/nfs.h> 81252726Srpaulo#include <nfsserver/nfsm_subs.h> 82252726Srpaulo#include <nfsserver/nfsrvcache.h> 83252726Srpaulo#include <nfsserver/nfs_fha.h> 84252726Srpaulo 85252726Srpaulo#include <security/mac/mac_framework.h> 86252726Srpaulo 87252726Srpaulo#ifndef NFS_LEGACYRPC 88252726Srpaulo 89252726Srpaulostatic MALLOC_DEFINE(M_NFSSVC, "nfss_srvsock", "Nfs server structure"); 90252726Srpaulo 91252726SrpauloMALLOC_DEFINE(M_NFSRVDESC, "nfss_srvdesc", "NFS server socket descriptor"); 92252726SrpauloMALLOC_DEFINE(M_NFSD, "nfss_daemon", "Nfs server daemon structure"); 93252726Srpaulo 94252726Srpaulo#define TRUE 1 95252726Srpaulo#define FALSE 0 96252726Srpaulo 97252726SrpauloSYSCTL_DECL(_vfs_nfsrv); 98252726Srpaulo 99252726SrpauloSVCPOOL *nfsrv_pool; 100252726Srpauloint nfsd_waiting = 0; 101252726Srpauloint nfsrv_numnfsd = 0; 102252726Srpaulostatic int nfs_realign_test; 103252726Srpaulostatic int nfs_realign_count; 104252726Srpaulostruct callout nfsrv_callout; 105252726Srpaulostatic eventhandler_tag nfsrv_nmbclusters_tag; 106252726Srpaulo 107252726Srpaulostatic int nfs_privport = 0; 108252726SrpauloSYSCTL_INT(_vfs_nfsrv, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, 109252726Srpaulo &nfs_privport, 0, 110252726Srpaulo "Only allow clients using a privileged port"); 111214501SrpauloSYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay, CTLFLAG_RW, 112214501Srpaulo &nfsrvw_procrastinate, 0, 113214501Srpaulo "Delay value for write gathering"); 114SYSCTL_INT(_vfs_nfsrv, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, 115 &nfsrvw_procrastinate_v3, 0, 116 "Delay in seconds for NFSv3 write gathering"); 117SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_test, CTLFLAG_RW, 118 &nfs_realign_test, 0, ""); 119SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_count, CTLFLAG_RW, 120 &nfs_realign_count, 0, ""); 121 122static int nfssvc_addsock(struct file *, struct thread *); 123static int nfssvc_nfsd(struct thread *, struct nfsd_nfsd_args *); 124 125extern u_long sb_max_adj; 126 127int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, 128 struct nfssvc_sock *slp, struct mbuf **mreqp) = { 129 nfsrv_null, 130 nfsrv_getattr, 131 nfsrv_setattr, 132 nfsrv_lookup, 133 nfsrv3_access, 134 nfsrv_readlink, 135 nfsrv_read, 136 nfsrv_write, 137 nfsrv_create, 138 nfsrv_mkdir, 139 nfsrv_symlink, 140 nfsrv_mknod, 141 nfsrv_remove, 142 nfsrv_rmdir, 143 nfsrv_rename, 144 nfsrv_link, 145 nfsrv_readdir, 146 nfsrv_readdirplus, 147 nfsrv_statfs, 148 nfsrv_fsinfo, 149 nfsrv_pathconf, 150 nfsrv_commit, 151 nfsrv_noop 152}; 153 154/* 155 * NFS server system calls 156 */ 157/* 158 * This is now called from nfssvc() in nfs/nfs_nfssvc.c. 159 */ 160 161/* 162 * Nfs server psuedo system call for the nfsd's 163 * Based on the flag value it either: 164 * - adds a socket to the selection list 165 * - remains in the kernel as an nfsd 166 * - remains in the kernel as an nfsiod 167 * For INET6 we suppose that nfsd provides only IN6P_IPV6_V6ONLY sockets 168 * and that mountd provides 169 * - sockaddr with no IPv4-mapped addresses 170 * - mask for both INET and INET6 families if there is IPv4-mapped overlap 171 */ 172int 173nfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap) 174{ 175 struct file *fp; 176 struct nfsd_addsock_args addsockarg; 177 struct nfsd_nfsd_args nfsdarg; 178 int error; 179 180 if (uap->flag & NFSSVC_ADDSOCK) { 181 error = copyin(uap->argp, (caddr_t)&addsockarg, 182 sizeof(addsockarg)); 183 if (error) 184 return (error); 185 if ((error = fget(td, addsockarg.sock, &fp)) != 0) 186 return (error); 187 if (fp->f_type != DTYPE_SOCKET) { 188 fdrop(fp, td); 189 return (error); /* XXXRW: Should be EINVAL? */ 190 } 191 error = nfssvc_addsock(fp, td); 192 fdrop(fp, td); 193 } else if (uap->flag & NFSSVC_OLDNFSD) { 194 error = nfssvc_nfsd(td, NULL); 195 } else if (uap->flag & NFSSVC_NFSD) { 196 if (!uap->argp) 197 return (EINVAL); 198 error = copyin(uap->argp, (caddr_t)&nfsdarg, 199 sizeof(nfsdarg)); 200 if (error) 201 return (error); 202 error = nfssvc_nfsd(td, &nfsdarg); 203 } else { 204 error = ENXIO; 205 } 206 return (error); 207} 208 209/* 210 * Generate the rpc reply header 211 * siz arg. is used to decide if adding a cluster is worthwhile 212 */ 213struct mbuf * 214nfs_rephead(int siz, struct nfsrv_descript *nd, int err, 215 struct mbuf **mbp, caddr_t *bposp) 216{ 217 u_int32_t *tl; 218 struct mbuf *mreq; 219 caddr_t bpos; 220 struct mbuf *mb; 221 222 if (err == EBADRPC) 223 return (NULL); 224 225 nd->nd_repstat = err; 226 if (err && (nd->nd_flag & ND_NFSV3) == 0) /* XXX recheck */ 227 siz = 0; 228 229 MGET(mreq, M_WAIT, MT_DATA); 230 231 /* 232 * If this is a big reply, use a cluster 233 */ 234 mreq->m_len = 0; 235 if (siz >= MINCLSIZE) { 236 MCLGET(mreq, M_WAIT); 237 } 238 mb = mreq; 239 bpos = mtod(mb, caddr_t); 240 241 if (err != NFSERR_RETVOID) { 242 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 243 if (err) 244 *tl = txdr_unsigned(nfsrv_errmap(nd, err)); 245 else 246 *tl = 0; 247 } 248 249 *mbp = mb; 250 *bposp = bpos; 251 if (err != 0 && err != NFSERR_RETVOID) 252 nfsrvstats.srvrpc_errs++; 253 254 return (mreq); 255} 256 257/* 258 * nfs_realign: 259 * 260 * Check for badly aligned mbuf data and realign by copying the unaligned 261 * portion of the data into a new mbuf chain and freeing the portions 262 * of the old chain that were replaced. 263 * 264 * We cannot simply realign the data within the existing mbuf chain 265 * because the underlying buffers may contain other rpc commands and 266 * we cannot afford to overwrite them. 267 * 268 * We would prefer to avoid this situation entirely. The situation does 269 * not occur with NFS/UDP and is supposed to only occassionally occur 270 * with TCP. Use vfs.nfs.realign_count and realign_test to check this. 271 */ 272static void 273nfs_realign(struct mbuf **pm) /* XXX COMMON */ 274{ 275 struct mbuf *m; 276 struct mbuf *n = NULL; 277 int off = 0; 278 279 ++nfs_realign_test; 280 while ((m = *pm) != NULL) { 281 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) { 282 MGET(n, M_WAIT, MT_DATA); 283 if (m->m_len >= MINCLSIZE) { 284 MCLGET(n, M_WAIT); 285 } 286 n->m_len = 0; 287 break; 288 } 289 pm = &m->m_next; 290 } 291 292 /* 293 * If n is non-NULL, loop on m copying data, then replace the 294 * portion of the chain that had to be realigned. 295 */ 296 if (n != NULL) { 297 ++nfs_realign_count; 298 while (m) { 299 m_copyback(n, off, m->m_len, mtod(m, caddr_t)); 300 off += m->m_len; 301 m = m->m_next; 302 } 303 m_freem(*pm); 304 *pm = n; 305 } 306} 307 308static void 309nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) 310{ 311 rpcproc_t procnum; 312 int32_t (*proc)(struct nfsrv_descript *nd, struct nfssvc_sock *slp, 313 struct mbuf **mreqp); 314 int flag; 315 struct nfsrv_descript nd; 316 struct mbuf *mreq, *mrep; 317 int error; 318 319 if (rqst->rq_vers == NFS_VER2) { 320 if (rqst->rq_proc > NFSV2PROC_STATFS) { 321 svcerr_noproc(rqst); 322 svc_freereq(rqst); 323 return; 324 } 325 procnum = nfsrv_nfsv3_procid[rqst->rq_proc]; 326 flag = 0; 327 } else { 328 if (rqst->rq_proc >= NFS_NPROCS) { 329 svcerr_noproc(rqst); 330 svc_freereq(rqst); 331 return; 332 } 333 procnum = rqst->rq_proc; 334 flag = ND_NFSV3; 335 } 336 proc = nfsrv3_procs[procnum]; 337 338 mreq = mrep = NULL; 339 mreq = rqst->rq_args; 340 rqst->rq_args = NULL; 341 nfs_realign(&mreq); 342 343 /* 344 * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 - 345 * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP 346 * mounts. 347 */ 348 memset(&nd, 0, sizeof(nd)); 349 nd.nd_md = nd.nd_mrep = mreq; 350 nd.nd_dpos = mtod(mreq, caddr_t); 351 nd.nd_nam = svc_getrpccaller(rqst); 352 nd.nd_nam2 = rqst->rq_addr; 353 nd.nd_procnum = procnum; 354 nd.nd_cr = NULL; 355 nd.nd_flag = flag; 356 357 if (nfs_privport) { 358 /* Check if source port is privileged */ 359 u_short port; 360 struct sockaddr *nam = nd.nd_nam; 361 struct sockaddr_in *sin; 362 363 sin = (struct sockaddr_in *)nam; 364 /* 365 * INET/INET6 - same code: 366 * sin_port and sin6_port are at same offset 367 */ 368 port = ntohs(sin->sin_port); 369 if (port >= IPPORT_RESERVED && 370 nd.nd_procnum != NFSPROC_NULL) { 371#ifdef INET6 372 char b6[INET6_ADDRSTRLEN]; 373#if defined(KLD_MODULE) 374 /* Do not use ip6_sprintf: the nfs module should work without INET6. */ 375#define ip6_sprintf(buf, a) \ 376 (sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x", \ 377 (a)->s6_addr16[0], (a)->s6_addr16[1], \ 378 (a)->s6_addr16[2], (a)->s6_addr16[3], \ 379 (a)->s6_addr16[4], (a)->s6_addr16[5], \ 380 (a)->s6_addr16[6], (a)->s6_addr16[7]), \ 381 (buf)) 382#endif 383#endif 384 printf("NFS request from unprivileged port (%s:%d)\n", 385#ifdef INET6 386 sin->sin_family == AF_INET6 ? 387 ip6_sprintf(b6, &satosin6(sin)->sin6_addr) : 388#if defined(KLD_MODULE) 389#undef ip6_sprintf 390#endif 391#endif 392 inet_ntoa(sin->sin_addr), port); 393 m_freem(mreq); 394 svcerr_weakauth(rqst); 395 svc_freereq(rqst); 396 return; 397 } 398 } 399 400 if (proc != nfsrv_null) { 401 if (!svc_getcred(rqst, &nd.nd_cr, &nd.nd_credflavor)) { 402 m_freem(mreq); 403 svcerr_weakauth(rqst); 404 svc_freereq(rqst); 405 return; 406 } 407#ifdef MAC 408 mac_cred_associate_nfsd(nd.nd_cr); 409#endif 410 } 411 nfsrvstats.srvrpccnt[nd.nd_procnum]++; 412 413 error = proc(&nd, NULL, &mrep); 414 415 if (nd.nd_cr) 416 crfree(nd.nd_cr); 417 418 if (mrep == NULL) { 419 svcerr_decode(rqst); 420 svc_freereq(rqst); 421 return; 422 } 423 if (error && error != NFSERR_RETVOID) { 424 svcerr_systemerr(rqst); 425 svc_freereq(rqst); 426 return; 427 } 428 if (nd.nd_repstat & NFSERR_AUTHERR) { 429 svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR); 430 m_freem(mrep); 431 } else { 432 if (!svc_sendreply_mbuf(rqst, mrep)) 433 svcerr_systemerr(rqst); 434 } 435 svc_freereq(rqst); 436} 437 438/* 439 * Adds a socket to the list for servicing by nfsds. 440 */ 441static int 442nfssvc_addsock(struct file *fp, struct thread *td) 443{ 444 int siz; 445 struct socket *so; 446 int error; 447 SVCXPRT *xprt; 448 449 so = fp->f_data; 450 451 siz = sb_max_adj; 452 error = soreserve(so, siz, siz); 453 if (error) { 454 return (error); 455 } 456 457 /* 458 * Steal the socket from userland so that it doesn't close 459 * unexpectedly. 460 */ 461 if (so->so_type == SOCK_DGRAM) 462 xprt = svc_dg_create(nfsrv_pool, so, 0, 0); 463 else 464 xprt = svc_vc_create(nfsrv_pool, so, 0, 0); 465 if (xprt) { 466 fp->f_ops = &badfileops; 467 fp->f_data = NULL; 468 svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL); 469 svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL); 470 } 471 472 return (0); 473} 474 475/* 476 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests 477 * until it is killed by a signal. 478 */ 479static int 480nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args) 481{ 482#ifdef KGSSAPI 483 char principal[128]; 484 int error; 485#endif 486 487#ifdef KGSSAPI 488 if (args) { 489 error = copyinstr(args->principal, principal, 490 sizeof(principal), NULL); 491 if (error) 492 return (error); 493 } else { 494 memcpy(principal, "nfs@", 4); 495 getcredhostname(td->td_ucred, principal + 4, 496 sizeof(principal) - 4); 497 } 498#endif 499 500 /* 501 * Only the first nfsd actually does any work. The RPC code 502 * adds threads to it as needed. Any extra processes offered 503 * by nfsd just exit. If nfsd is new enough, it will call us 504 * once with a structure that specifies how many threads to 505 * use. 506 */ 507 NFSD_LOCK(); 508 if (nfsrv_numnfsd == 0) { 509 nfsrv_numnfsd++; 510 511 NFSD_UNLOCK(); 512 513#ifdef KGSSAPI 514 rpc_gss_set_svc_name(principal, "kerberosv5", 515 GSS_C_INDEFINITE, NFS_PROG, NFS_VER2); 516 rpc_gss_set_svc_name(principal, "kerberosv5", 517 GSS_C_INDEFINITE, NFS_PROG, NFS_VER3); 518#endif 519 520 if (args) { 521 nfsrv_pool->sp_minthreads = args->minthreads; 522 nfsrv_pool->sp_maxthreads = args->maxthreads; 523 } else { 524 nfsrv_pool->sp_minthreads = 4; 525 nfsrv_pool->sp_maxthreads = 4; 526 } 527 528 svc_run(nfsrv_pool); 529 530#ifdef KGSSAPI 531 rpc_gss_clear_svc_name(NFS_PROG, NFS_VER2); 532 rpc_gss_clear_svc_name(NFS_PROG, NFS_VER3); 533#endif 534 535 NFSD_LOCK(); 536 nfsrv_numnfsd--; 537 nfsrv_init(TRUE); 538 } 539 NFSD_UNLOCK(); 540 541 return (0); 542} 543 544/* 545 * Size the NFS server's duplicate request cache at 1/2 the 546 * nmbclusters, floating within a (64, 2048) range. This is to 547 * prevent all mbuf clusters being tied up in the NFS dupreq 548 * cache for small values of nmbclusters. 549 */ 550static size_t 551nfsrv_replay_size(void) 552{ 553 size_t replaysiz; 554 555 replaysiz = nmbclusters / 2; 556 if (replaysiz > NFSRVCACHE_MAX_SIZE) 557 replaysiz = NFSRVCACHE_MAX_SIZE; 558 if (replaysiz < NFSRVCACHE_MIN_SIZE) 559 replaysiz = NFSRVCACHE_MIN_SIZE; 560 replaysiz *= MCLBYTES; 561 562 return (replaysiz); 563} 564 565/* 566 * Called when nmbclusters changes - we resize the replay cache 567 * accordingly. 568 */ 569static void 570nfsrv_nmbclusters_change(void *tag) 571{ 572 573 if (nfsrv_pool) 574 replay_setsize(nfsrv_pool->sp_rcache, nfsrv_replay_size()); 575} 576 577/* 578 * Initialize the data structures for the server. 579 * Handshake with any new nfsds starting up to avoid any chance of 580 * corruption. 581 */ 582void 583nfsrv_init(int terminating) 584{ 585 586 NFSD_LOCK_ASSERT(); 587 588 if (terminating) { 589 NFSD_UNLOCK(); 590 EVENTHANDLER_DEREGISTER(nmbclusters_change, 591 nfsrv_nmbclusters_tag); 592 svcpool_destroy(nfsrv_pool); 593 nfsrv_pool = NULL; 594 NFSD_LOCK(); 595 } else 596 nfs_pub.np_valid = 0; 597 598 NFSD_UNLOCK(); 599 600 nfsrv_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsrv)); 601 nfsrv_pool->sp_rcache = replay_newcache(nfsrv_replay_size()); 602 nfsrv_pool->sp_assign = fha_assign; 603 nfsrv_pool->sp_done = fha_nd_complete; 604 nfsrv_nmbclusters_tag = EVENTHANDLER_REGISTER(nmbclusters_change, 605 nfsrv_nmbclusters_change, NULL, EVENTHANDLER_PRI_FIRST); 606 607 NFSD_LOCK(); 608} 609 610#endif /* !NFS_LEGACYRPC */ 611