1/* 2 * linux/net/sunrpc/svcsock.c 3 * 4 * These are the RPC server socket internals. 5 * 6 * The server scheduling algorithm does not always distribute the load 7 * evenly when servicing a single client. May need to modify the 8 * svc_xprt_enqueue procedure... 9 * 10 * TCP support is largely untested and may be a little slow. The problem 11 * is that we currently do two separate recvfrom's, one for the 4-byte 12 * record length, and the second for the actual record. This could possibly 13 * be improved by always reading a minimum size of around 100 bytes and 14 * tucking any superfluous bytes away in a temporary store. Still, that 15 * leaves write requests out in the rain. An alternative may be to peek at 16 * the first skb in the queue, and if it matches the next TCP sequence 17 * number, to extract the record marker. Yuck. 18 * 19 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 20 */ 21 22#include <linux/kernel.h> 23#include <linux/sched.h> 24#include <linux/errno.h> 25#include <linux/fcntl.h> 26#include <linux/net.h> 27#include <linux/in.h> 28#include <linux/inet.h> 29#include <linux/udp.h> 30#include <linux/tcp.h> 31#include <linux/unistd.h> 32#include <linux/slab.h> 33#include <linux/netdevice.h> 34#include <linux/skbuff.h> 35#include <linux/file.h> 36#include <linux/freezer.h> 37#include <net/sock.h> 38#include <net/checksum.h> 39#include <net/ip.h> 40#include <net/ipv6.h> 41#include <net/tcp.h> 42#include <net/tcp_states.h> 43#include <asm/uaccess.h> 44#include <asm/ioctls.h> 45 46#include <linux/sunrpc/types.h> 47#include <linux/sunrpc/clnt.h> 48#include <linux/sunrpc/xdr.h> 49#include <linux/sunrpc/msg_prot.h> 50#include <linux/sunrpc/svcsock.h> 51#include <linux/sunrpc/stats.h> 52#include <linux/sunrpc/xprt.h> 53 54#define RPCDBG_FACILITY RPCDBG_SVCXPRT 55 56 57static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, 58 int *errp, int flags); 59static void svc_udp_data_ready(struct sock *, int); 60static int svc_udp_recvfrom(struct svc_rqst *); 61static int svc_udp_sendto(struct svc_rqst *); 62static void svc_sock_detach(struct svc_xprt *); 63static void svc_tcp_sock_detach(struct svc_xprt *); 64static void svc_sock_free(struct svc_xprt *); 65 66static struct svc_xprt *svc_create_socket(struct svc_serv *, int, 67 struct sockaddr *, int, int); 68#ifdef CONFIG_DEBUG_LOCK_ALLOC 69static struct lock_class_key svc_key[2]; 70static struct lock_class_key svc_slock_key[2]; 71 72static void svc_reclassify_socket(struct socket *sock) 73{ 74 struct sock *sk = sock->sk; 75 BUG_ON(sock_owned_by_user(sk)); 76 switch (sk->sk_family) { 77 case AF_INET: 78 sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", 79 &svc_slock_key[0], 80 "sk_xprt.xpt_lock-AF_INET-NFSD", 81 &svc_key[0]); 82 break; 83 84 case AF_INET6: 85 sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", 86 &svc_slock_key[1], 87 "sk_xprt.xpt_lock-AF_INET6-NFSD", 88 &svc_key[1]); 89 break; 90 91 default: 92 BUG(); 93 } 94} 95#else 96static void svc_reclassify_socket(struct socket *sock) 97{ 98} 99#endif 100 101/* 102 * Release an skbuff after use 103 */ 104static void svc_release_skb(struct svc_rqst *rqstp) 105{ 106 struct sk_buff *skb = rqstp->rq_xprt_ctxt; 107 108 if (skb) { 109 struct svc_sock *svsk = 110 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 111 rqstp->rq_xprt_ctxt = NULL; 112 113 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); 114 skb_free_datagram_locked(svsk->sk_sk, skb); 115 } 116} 117 118union svc_pktinfo_u { 119 struct in_pktinfo pkti; 120 struct in6_pktinfo pkti6; 121}; 122#define SVC_PKTINFO_SPACE \ 123 CMSG_SPACE(sizeof(union svc_pktinfo_u)) 124 125static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) 126{ 127 struct svc_sock *svsk = 128 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 129 switch (svsk->sk_sk->sk_family) { 130 case AF_INET: { 131 struct in_pktinfo *pki = CMSG_DATA(cmh); 132 133 cmh->cmsg_level = SOL_IP; 134 cmh->cmsg_type = IP_PKTINFO; 135 pki->ipi_ifindex = 0; 136 pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr; 137 cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); 138 } 139 break; 140 141 case AF_INET6: { 142 struct in6_pktinfo *pki = CMSG_DATA(cmh); 143 144 cmh->cmsg_level = SOL_IPV6; 145 cmh->cmsg_type = IPV6_PKTINFO; 146 pki->ipi6_ifindex = 0; 147 ipv6_addr_copy(&pki->ipi6_addr, 148 &rqstp->rq_daddr.addr6); 149 cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); 150 } 151 break; 152 } 153} 154 155/* 156 * send routine intended to be shared by the fore- and back-channel 157 */ 158int svc_send_common(struct socket *sock, struct xdr_buf *xdr, 159 struct page *headpage, unsigned long headoffset, 160 struct page *tailpage, unsigned long tailoffset) 161{ 162 int result; 163 int size; 164 struct page **ppage = xdr->pages; 165 size_t base = xdr->page_base; 166 unsigned int pglen = xdr->page_len; 167 unsigned int flags = MSG_MORE; 168 int slen; 169 int len = 0; 170 171 slen = xdr->len; 172 173 /* send head */ 174 if (slen == xdr->head[0].iov_len) 175 flags = 0; 176 len = kernel_sendpage(sock, headpage, headoffset, 177 xdr->head[0].iov_len, flags); 178 if (len != xdr->head[0].iov_len) 179 goto out; 180 slen -= xdr->head[0].iov_len; 181 if (slen == 0) 182 goto out; 183 184 /* send page data */ 185 size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen; 186 while (pglen > 0) { 187 if (slen == size) 188 flags = 0; 189 result = kernel_sendpage(sock, *ppage, base, size, flags); 190 if (result > 0) 191 len += result; 192 if (result != size) 193 goto out; 194 slen -= size; 195 pglen -= size; 196 size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen; 197 base = 0; 198 ppage++; 199 } 200 201 /* send tail */ 202 if (xdr->tail[0].iov_len) { 203 result = kernel_sendpage(sock, tailpage, tailoffset, 204 xdr->tail[0].iov_len, 0); 205 if (result > 0) 206 len += result; 207 } 208 209out: 210 return len; 211} 212 213 214/* 215 * Generic sendto routine 216 */ 217static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) 218{ 219 struct svc_sock *svsk = 220 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 221 struct socket *sock = svsk->sk_sock; 222 union { 223 struct cmsghdr hdr; 224 long all[SVC_PKTINFO_SPACE / sizeof(long)]; 225 } buffer; 226 struct cmsghdr *cmh = &buffer.hdr; 227 int len = 0; 228 unsigned long tailoff; 229 unsigned long headoff; 230 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 231 232 if (rqstp->rq_prot == IPPROTO_UDP) { 233 struct msghdr msg = { 234 .msg_name = &rqstp->rq_addr, 235 .msg_namelen = rqstp->rq_addrlen, 236 .msg_control = cmh, 237 .msg_controllen = sizeof(buffer), 238 .msg_flags = MSG_MORE, 239 }; 240 241 svc_set_cmsg_data(rqstp, cmh); 242 243 if (sock_sendmsg(sock, &msg, 0) < 0) 244 goto out; 245 } 246 247 tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1); 248 headoff = 0; 249 len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff, 250 rqstp->rq_respages[0], tailoff); 251 252out: 253 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", 254 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, 255 xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); 256 257 return len; 258} 259 260/* 261 * Report socket names for nfsdfs 262 */ 263static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) 264{ 265 const struct sock *sk = svsk->sk_sk; 266 const char *proto_name = sk->sk_protocol == IPPROTO_UDP ? 267 "udp" : "tcp"; 268 int len; 269 270 switch (sk->sk_family) { 271 case PF_INET: 272 len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", 273 proto_name, 274 &inet_sk(sk)->inet_rcv_saddr, 275 inet_sk(sk)->inet_num); 276 break; 277 case PF_INET6: 278 len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", 279 proto_name, 280 &inet6_sk(sk)->rcv_saddr, 281 inet_sk(sk)->inet_num); 282 break; 283 default: 284 len = snprintf(buf, remaining, "*unknown-%d*\n", 285 sk->sk_family); 286 } 287 288 if (len >= remaining) { 289 *buf = '\0'; 290 return -ENAMETOOLONG; 291 } 292 return len; 293} 294 295/** 296 * svc_sock_names - construct a list of listener names in a string 297 * @serv: pointer to RPC service 298 * @buf: pointer to a buffer to fill in with socket names 299 * @buflen: size of the buffer to be filled 300 * @toclose: pointer to '\0'-terminated C string containing the name 301 * of a listener to be closed 302 * 303 * Fills in @buf with a '\n'-separated list of names of listener 304 * sockets. If @toclose is not NULL, the socket named by @toclose 305 * is closed, and is not included in the output list. 306 * 307 * Returns positive length of the socket name string, or a negative 308 * errno value on error. 309 */ 310int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, 311 const char *toclose) 312{ 313 struct svc_sock *svsk, *closesk = NULL; 314 int len = 0; 315 316 if (!serv) 317 return 0; 318 319 spin_lock_bh(&serv->sv_lock); 320 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { 321 int onelen = svc_one_sock_name(svsk, buf + len, buflen - len); 322 if (onelen < 0) { 323 len = onelen; 324 break; 325 } 326 if (toclose && strcmp(toclose, buf + len) == 0) 327 closesk = svsk; 328 else 329 len += onelen; 330 } 331 spin_unlock_bh(&serv->sv_lock); 332 333 if (closesk) 334 /* Should unregister with portmap, but you cannot 335 * unregister just one protocol... 336 */ 337 svc_close_xprt(&closesk->sk_xprt); 338 else if (toclose) 339 return -ENOENT; 340 return len; 341} 342EXPORT_SYMBOL_GPL(svc_sock_names); 343 344/* 345 * Check input queue length 346 */ 347static int svc_recv_available(struct svc_sock *svsk) 348{ 349 struct socket *sock = svsk->sk_sock; 350 int avail, err; 351 352 err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); 353 354 return (err >= 0)? avail : err; 355} 356 357/* 358 * Generic recvfrom routine. 359 */ 360static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, 361 int buflen) 362{ 363 struct svc_sock *svsk = 364 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 365 struct msghdr msg = { 366 .msg_flags = MSG_DONTWAIT, 367 }; 368 int len; 369 370 rqstp->rq_xprt_hlen = 0; 371 372 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, 373 msg.msg_flags); 374 375 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", 376 svsk, iov[0].iov_base, iov[0].iov_len, len); 377 return len; 378} 379 380/* 381 * Set socket snd and rcv buffer lengths 382 */ 383static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, 384 unsigned int rcv) 385{ 386 /* sock_setsockopt limits use to sysctl_?mem_max, 387 * which isn't acceptable. Until that is made conditional 388 * on not having CAP_SYS_RESOURCE or similar, we go direct... 389 * DaveM said I could! 390 */ 391 lock_sock(sock->sk); 392 sock->sk->sk_sndbuf = snd * 2; 393 sock->sk->sk_rcvbuf = rcv * 2; 394 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; 395 sock->sk->sk_write_space(sock->sk); 396 release_sock(sock->sk); 397} 398/* 399 * INET callback when data has been received on the socket. 400 */ 401static void svc_udp_data_ready(struct sock *sk, int count) 402{ 403 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 404 405 if (svsk) { 406 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", 407 svsk, sk, count, 408 test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); 409 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 410 svc_xprt_enqueue(&svsk->sk_xprt); 411 } 412 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 413 wake_up_interruptible(sk_sleep(sk)); 414} 415 416/* 417 * INET callback when space is newly available on the socket. 418 */ 419static void svc_write_space(struct sock *sk) 420{ 421 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); 422 423 if (svsk) { 424 dprintk("svc: socket %p(inet %p), write_space busy=%d\n", 425 svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); 426 svc_xprt_enqueue(&svsk->sk_xprt); 427 } 428 429 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { 430 dprintk("RPC svc_write_space: someone sleeping on %p\n", 431 svsk); 432 wake_up_interruptible(sk_sleep(sk)); 433 } 434} 435 436static void svc_tcp_write_space(struct sock *sk) 437{ 438 struct socket *sock = sk->sk_socket; 439 440 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) 441 clear_bit(SOCK_NOSPACE, &sock->flags); 442 svc_write_space(sk); 443} 444 445/* 446 * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo 447 */ 448static int svc_udp_get_dest_address4(struct svc_rqst *rqstp, 449 struct cmsghdr *cmh) 450{ 451 struct in_pktinfo *pki = CMSG_DATA(cmh); 452 if (cmh->cmsg_type != IP_PKTINFO) 453 return 0; 454 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; 455 return 1; 456} 457 458/* 459 * See net/ipv6/datagram.c : datagram_recv_ctl 460 */ 461static int svc_udp_get_dest_address6(struct svc_rqst *rqstp, 462 struct cmsghdr *cmh) 463{ 464 struct in6_pktinfo *pki = CMSG_DATA(cmh); 465 if (cmh->cmsg_type != IPV6_PKTINFO) 466 return 0; 467 ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); 468 return 1; 469} 470 471/* 472 * Copy the UDP datagram's destination address to the rqstp structure. 473 * The 'destination' address in this case is the address to which the 474 * peer sent the datagram, i.e. our local address. For multihomed 475 * hosts, this can change from msg to msg. Note that only the IP 476 * address changes, the port number should remain the same. 477 */ 478static int svc_udp_get_dest_address(struct svc_rqst *rqstp, 479 struct cmsghdr *cmh) 480{ 481 switch (cmh->cmsg_level) { 482 case SOL_IP: 483 return svc_udp_get_dest_address4(rqstp, cmh); 484 case SOL_IPV6: 485 return svc_udp_get_dest_address6(rqstp, cmh); 486 } 487 488 return 0; 489} 490 491/* 492 * Receive a datagram from a UDP socket. 493 */ 494static int svc_udp_recvfrom(struct svc_rqst *rqstp) 495{ 496 struct svc_sock *svsk = 497 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 498 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 499 struct sk_buff *skb; 500 union { 501 struct cmsghdr hdr; 502 long all[SVC_PKTINFO_SPACE / sizeof(long)]; 503 } buffer; 504 struct cmsghdr *cmh = &buffer.hdr; 505 struct msghdr msg = { 506 .msg_name = svc_addr(rqstp), 507 .msg_control = cmh, 508 .msg_controllen = sizeof(buffer), 509 .msg_flags = MSG_DONTWAIT, 510 }; 511 size_t len; 512 int err; 513 514 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) 515 /* udp sockets need large rcvbuf as all pending 516 * requests are still in that buffer. sndbuf must 517 * also be large enough that there is enough space 518 * for one reply per thread. We count all threads 519 * rather than threads in a particular pool, which 520 * provides an upper bound on the number of threads 521 * which will access the socket. 522 */ 523 svc_sock_setbufsize(svsk->sk_sock, 524 (serv->sv_nrthreads+3) * serv->sv_max_mesg, 525 (serv->sv_nrthreads+3) * serv->sv_max_mesg); 526 527 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 528 skb = NULL; 529 err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 530 0, 0, MSG_PEEK | MSG_DONTWAIT); 531 if (err >= 0) 532 skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); 533 534 if (skb == NULL) { 535 if (err != -EAGAIN) { 536 /* possibly an icmp error */ 537 dprintk("svc: recvfrom returned error %d\n", -err); 538 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 539 } 540 return -EAGAIN; 541 } 542 len = svc_addr_len(svc_addr(rqstp)); 543 if (len == 0) 544 return -EAFNOSUPPORT; 545 rqstp->rq_addrlen = len; 546 if (skb->tstamp.tv64 == 0) { 547 skb->tstamp = ktime_get_real(); 548 /* Don't enable netstamp, sunrpc doesn't 549 need that much accuracy */ 550 } 551 svsk->sk_sk->sk_stamp = skb->tstamp; 552 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ 553 554 len = skb->len - sizeof(struct udphdr); 555 rqstp->rq_arg.len = len; 556 557 rqstp->rq_prot = IPPROTO_UDP; 558 559 if (!svc_udp_get_dest_address(rqstp, cmh)) { 560 if (net_ratelimit()) 561 printk(KERN_WARNING 562 "svc: received unknown control message %d/%d; " 563 "dropping RPC reply datagram\n", 564 cmh->cmsg_level, cmh->cmsg_type); 565 skb_free_datagram_locked(svsk->sk_sk, skb); 566 return 0; 567 } 568 569 if (skb_is_nonlinear(skb)) { 570 /* we have to copy */ 571 local_bh_disable(); 572 if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { 573 local_bh_enable(); 574 /* checksum error */ 575 skb_free_datagram_locked(svsk->sk_sk, skb); 576 return 0; 577 } 578 local_bh_enable(); 579 skb_free_datagram_locked(svsk->sk_sk, skb); 580 } else { 581 /* we can use it in-place */ 582 rqstp->rq_arg.head[0].iov_base = skb->data + 583 sizeof(struct udphdr); 584 rqstp->rq_arg.head[0].iov_len = len; 585 if (skb_checksum_complete(skb)) { 586 skb_free_datagram_locked(svsk->sk_sk, skb); 587 return 0; 588 } 589 rqstp->rq_xprt_ctxt = skb; 590 } 591 592 rqstp->rq_arg.page_base = 0; 593 if (len <= rqstp->rq_arg.head[0].iov_len) { 594 rqstp->rq_arg.head[0].iov_len = len; 595 rqstp->rq_arg.page_len = 0; 596 rqstp->rq_respages = rqstp->rq_pages+1; 597 } else { 598 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 599 rqstp->rq_respages = rqstp->rq_pages + 1 + 600 DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE); 601 } 602 603 if (serv->sv_stats) 604 serv->sv_stats->netudpcnt++; 605 606 return len; 607} 608 609static int 610svc_udp_sendto(struct svc_rqst *rqstp) 611{ 612 int error; 613 614 error = svc_sendto(rqstp, &rqstp->rq_res); 615 if (error == -ECONNREFUSED) 616 /* ICMP error on earlier request. */ 617 error = svc_sendto(rqstp, &rqstp->rq_res); 618 619 return error; 620} 621 622static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp) 623{ 624} 625 626static int svc_udp_has_wspace(struct svc_xprt *xprt) 627{ 628 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 629 struct svc_serv *serv = xprt->xpt_server; 630 unsigned long required; 631 632 /* 633 * Set the SOCK_NOSPACE flag before checking the available 634 * sock space. 635 */ 636 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 637 required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; 638 if (required*2 > sock_wspace(svsk->sk_sk)) 639 return 0; 640 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 641 return 1; 642} 643 644static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) 645{ 646 BUG(); 647 return NULL; 648} 649 650static struct svc_xprt *svc_udp_create(struct svc_serv *serv, 651 struct sockaddr *sa, int salen, 652 int flags) 653{ 654 return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); 655} 656 657static struct svc_xprt_ops svc_udp_ops = { 658 .xpo_create = svc_udp_create, 659 .xpo_recvfrom = svc_udp_recvfrom, 660 .xpo_sendto = svc_udp_sendto, 661 .xpo_release_rqst = svc_release_skb, 662 .xpo_detach = svc_sock_detach, 663 .xpo_free = svc_sock_free, 664 .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, 665 .xpo_has_wspace = svc_udp_has_wspace, 666 .xpo_accept = svc_udp_accept, 667}; 668 669static struct svc_xprt_class svc_udp_class = { 670 .xcl_name = "udp", 671 .xcl_owner = THIS_MODULE, 672 .xcl_ops = &svc_udp_ops, 673 .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, 674}; 675 676static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) 677{ 678 int err, level, optname, one = 1; 679 680 svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); 681 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); 682 svsk->sk_sk->sk_data_ready = svc_udp_data_ready; 683 svsk->sk_sk->sk_write_space = svc_write_space; 684 685 /* initialise setting must have enough space to 686 * receive and respond to one request. 687 * svc_udp_recvfrom will re-adjust if necessary 688 */ 689 svc_sock_setbufsize(svsk->sk_sock, 690 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, 691 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); 692 693 /* data might have come in before data_ready set up */ 694 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 695 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 696 697 /* make sure we get destination address info */ 698 switch (svsk->sk_sk->sk_family) { 699 case AF_INET: 700 level = SOL_IP; 701 optname = IP_PKTINFO; 702 break; 703 case AF_INET6: 704 level = SOL_IPV6; 705 optname = IPV6_RECVPKTINFO; 706 break; 707 default: 708 BUG(); 709 } 710 err = kernel_setsockopt(svsk->sk_sock, level, optname, 711 (char *)&one, sizeof(one)); 712 dprintk("svc: kernel_setsockopt returned %d\n", err); 713} 714 715/* 716 * A data_ready event on a listening socket means there's a connection 717 * pending. Do not use state_change as a substitute for it. 718 */ 719static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) 720{ 721 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 722 723 dprintk("svc: socket %p TCP (listen) state change %d\n", 724 sk, sk->sk_state); 725 726 /* 727 * This callback may called twice when a new connection 728 * is established as a child socket inherits everything 729 * from a parent LISTEN socket. 730 * 1) data_ready method of the parent socket will be called 731 * when one of child sockets become ESTABLISHED. 732 * 2) data_ready method of the child socket may be called 733 * when it receives data before the socket is accepted. 734 * In case of 2, we should ignore it silently. 735 */ 736 if (sk->sk_state == TCP_LISTEN) { 737 if (svsk) { 738 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); 739 svc_xprt_enqueue(&svsk->sk_xprt); 740 } else 741 printk("svc: socket %p: no user data\n", sk); 742 } 743 744 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 745 wake_up_interruptible_all(sk_sleep(sk)); 746} 747 748/* 749 * A state change on a connected socket means it's dying or dead. 750 */ 751static void svc_tcp_state_change(struct sock *sk) 752{ 753 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 754 755 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", 756 sk, sk->sk_state, sk->sk_user_data); 757 758 if (!svsk) 759 printk("svc: socket %p: no user data\n", sk); 760 else { 761 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 762 svc_xprt_enqueue(&svsk->sk_xprt); 763 } 764 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 765 wake_up_interruptible_all(sk_sleep(sk)); 766} 767 768static void svc_tcp_data_ready(struct sock *sk, int count) 769{ 770 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 771 772 dprintk("svc: socket %p TCP data ready (svsk %p)\n", 773 sk, sk->sk_user_data); 774 if (svsk) { 775 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 776 svc_xprt_enqueue(&svsk->sk_xprt); 777 } 778 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 779 wake_up_interruptible(sk_sleep(sk)); 780} 781 782/* 783 * Accept a TCP connection 784 */ 785static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) 786{ 787 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 788 struct sockaddr_storage addr; 789 struct sockaddr *sin = (struct sockaddr *) &addr; 790 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 791 struct socket *sock = svsk->sk_sock; 792 struct socket *newsock; 793 struct svc_sock *newsvsk; 794 int err, slen; 795 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 796 797 dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); 798 if (!sock) 799 return NULL; 800 801 clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); 802 err = kernel_accept(sock, &newsock, O_NONBLOCK); 803 if (err < 0) { 804 if (err == -ENOMEM) 805 printk(KERN_WARNING "%s: no more sockets!\n", 806 serv->sv_name); 807 else if (err != -EAGAIN && net_ratelimit()) 808 printk(KERN_WARNING "%s: accept failed (err %d)!\n", 809 serv->sv_name, -err); 810 return NULL; 811 } 812 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); 813 814 err = kernel_getpeername(newsock, sin, &slen); 815 if (err < 0) { 816 if (net_ratelimit()) 817 printk(KERN_WARNING "%s: peername failed (err %d)!\n", 818 serv->sv_name, -err); 819 goto failed; /* aborted connection or whatever */ 820 } 821 822 /* Ideally, we would want to reject connections from unauthorized 823 * hosts here, but when we get encryption, the IP of the host won't 824 * tell us anything. For now just warn about unpriv connections. 825 */ 826 if (!svc_port_is_privileged(sin)) { 827 dprintk(KERN_WARNING 828 "%s: connect from unprivileged port: %s\n", 829 serv->sv_name, 830 __svc_print_addr(sin, buf, sizeof(buf))); 831 } 832 dprintk("%s: connect from %s\n", serv->sv_name, 833 __svc_print_addr(sin, buf, sizeof(buf))); 834 835 /* make sure that a write doesn't block forever when 836 * low on memory 837 */ 838 newsock->sk->sk_sndtimeo = HZ*30; 839 840 if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 841 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) 842 goto failed; 843 svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); 844 err = kernel_getsockname(newsock, sin, &slen); 845 if (unlikely(err < 0)) { 846 dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); 847 slen = offsetof(struct sockaddr, sa_data); 848 } 849 svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); 850 851 if (serv->sv_stats) 852 serv->sv_stats->nettcpconn++; 853 854 return &newsvsk->sk_xprt; 855 856failed: 857 sock_release(newsock); 858 return NULL; 859} 860 861/* 862 * Receive data. 863 * If we haven't gotten the record length yet, get the next four bytes. 864 * Otherwise try to gobble up as much as possible up to the complete 865 * record length. 866 */ 867static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) 868{ 869 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 870 int len; 871 872 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) 873 /* sndbuf needs to have room for one request 874 * per thread, otherwise we can stall even when the 875 * network isn't a bottleneck. 876 * 877 * We count all threads rather than threads in a 878 * particular pool, which provides an upper bound 879 * on the number of threads which will access the socket. 880 * 881 * rcvbuf just needs to be able to hold a few requests. 882 * Normally they will be removed from the queue 883 * as soon a a complete request arrives. 884 */ 885 svc_sock_setbufsize(svsk->sk_sock, 886 (serv->sv_nrthreads+3) * serv->sv_max_mesg, 887 3 * serv->sv_max_mesg); 888 889 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 890 891 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 892 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; 893 struct kvec iov; 894 895 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; 896 iov.iov_len = want; 897 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) 898 goto error; 899 svsk->sk_tcplen += len; 900 901 if (len < want) { 902 dprintk("svc: short recvfrom while reading record " 903 "length (%d of %d)\n", len, want); 904 goto err_again; /* record header not complete */ 905 } 906 907 svsk->sk_reclen = ntohl(svsk->sk_reclen); 908 if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { 909 if (net_ratelimit()) 910 printk(KERN_NOTICE "RPC: multiple fragments " 911 "per record not supported\n"); 912 goto err_delete; 913 } 914 915 svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; 916 dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); 917 if (svsk->sk_reclen > serv->sv_max_mesg) { 918 if (net_ratelimit()) 919 printk(KERN_NOTICE "RPC: " 920 "fragment too large: 0x%08lx\n", 921 (unsigned long)svsk->sk_reclen); 922 goto err_delete; 923 } 924 } 925 926 /* Check whether enough data is available */ 927 len = svc_recv_available(svsk); 928 if (len < 0) 929 goto error; 930 931 if (len < svsk->sk_reclen) { 932 dprintk("svc: incomplete TCP record (%d of %d)\n", 933 len, svsk->sk_reclen); 934 goto err_again; /* record not complete */ 935 } 936 len = svsk->sk_reclen; 937 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 938 939 return len; 940 error: 941 if (len == -EAGAIN) 942 dprintk("RPC: TCP recv_record got EAGAIN\n"); 943 return len; 944 err_delete: 945 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 946 err_again: 947 return -EAGAIN; 948} 949 950static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, 951 struct rpc_rqst **reqpp, struct kvec *vec) 952{ 953 struct rpc_rqst *req = NULL; 954 u32 *p; 955 u32 xid; 956 u32 calldir; 957 int len; 958 959 len = svc_recvfrom(rqstp, vec, 1, 8); 960 if (len < 0) 961 goto error; 962 963 p = (u32 *)rqstp->rq_arg.head[0].iov_base; 964 xid = *p++; 965 calldir = *p; 966 967 if (calldir == 0) { 968 /* REQUEST is the most common case */ 969 vec[0] = rqstp->rq_arg.head[0]; 970 } else { 971 /* REPLY */ 972 if (svsk->sk_bc_xprt) 973 req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid); 974 975 if (!req) { 976 printk(KERN_NOTICE 977 "%s: Got unrecognized reply: " 978 "calldir 0x%x sk_bc_xprt %p xid %08x\n", 979 __func__, ntohl(calldir), 980 svsk->sk_bc_xprt, xid); 981 vec[0] = rqstp->rq_arg.head[0]; 982 goto out; 983 } 984 985 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 986 sizeof(struct xdr_buf)); 987 /* copy the xid and call direction */ 988 memcpy(req->rq_private_buf.head[0].iov_base, 989 rqstp->rq_arg.head[0].iov_base, 8); 990 vec[0] = req->rq_private_buf.head[0]; 991 } 992 out: 993 vec[0].iov_base += 8; 994 vec[0].iov_len -= 8; 995 len = svsk->sk_reclen - 8; 996 error: 997 *reqpp = req; 998 return len; 999} 1000 1001/* 1002 * Receive data from a TCP socket. 1003 */ 1004static int svc_tcp_recvfrom(struct svc_rqst *rqstp) 1005{ 1006 struct svc_sock *svsk = 1007 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 1008 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 1009 int len; 1010 struct kvec *vec; 1011 int pnum, vlen; 1012 struct rpc_rqst *req = NULL; 1013 1014 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 1015 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), 1016 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), 1017 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); 1018 1019 len = svc_tcp_recv_record(svsk, rqstp); 1020 if (len < 0) 1021 goto error; 1022 1023 vec = rqstp->rq_vec; 1024 vec[0] = rqstp->rq_arg.head[0]; 1025 vlen = PAGE_SIZE; 1026 1027 /* 1028 * We have enough data for the whole tcp record. Let's try and read the 1029 * first 8 bytes to get the xid and the call direction. We can use this 1030 * to figure out if this is a call or a reply to a callback. If 1031 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. 1032 * In that case, don't bother with the calldir and just read the data. 1033 * It will be rejected in svc_process. 1034 */ 1035 if (len >= 8) { 1036 len = svc_process_calldir(svsk, rqstp, &req, vec); 1037 if (len < 0) 1038 goto err_again; 1039 vlen -= 8; 1040 } 1041 1042 pnum = 1; 1043 while (vlen < len) { 1044 vec[pnum].iov_base = (req) ? 1045 page_address(req->rq_private_buf.pages[pnum - 1]) : 1046 page_address(rqstp->rq_pages[pnum]); 1047 vec[pnum].iov_len = PAGE_SIZE; 1048 pnum++; 1049 vlen += PAGE_SIZE; 1050 } 1051 rqstp->rq_respages = &rqstp->rq_pages[pnum]; 1052 1053 /* Now receive data */ 1054 len = svc_recvfrom(rqstp, vec, pnum, len); 1055 if (len < 0) 1056 goto err_again; 1057 1058 /* 1059 * Account for the 8 bytes we read earlier 1060 */ 1061 len += 8; 1062 1063 if (req) { 1064 xprt_complete_rqst(req->rq_task, len); 1065 len = 0; 1066 goto out; 1067 } 1068 dprintk("svc: TCP complete record (%d bytes)\n", len); 1069 rqstp->rq_arg.len = len; 1070 rqstp->rq_arg.page_base = 0; 1071 if (len <= rqstp->rq_arg.head[0].iov_len) { 1072 rqstp->rq_arg.head[0].iov_len = len; 1073 rqstp->rq_arg.page_len = 0; 1074 } else { 1075 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 1076 } 1077 1078 rqstp->rq_xprt_ctxt = NULL; 1079 rqstp->rq_prot = IPPROTO_TCP; 1080 1081out: 1082 /* Reset TCP read info */ 1083 svsk->sk_reclen = 0; 1084 svsk->sk_tcplen = 0; 1085 1086 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); 1087 if (serv->sv_stats) 1088 serv->sv_stats->nettcpcnt++; 1089 1090 return len; 1091 1092err_again: 1093 if (len == -EAGAIN) { 1094 dprintk("RPC: TCP recvfrom got EAGAIN\n"); 1095 return len; 1096 } 1097error: 1098 if (len != -EAGAIN) { 1099 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1100 svsk->sk_xprt.xpt_server->sv_name, -len); 1101 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1102 } 1103 return -EAGAIN; 1104} 1105 1106/* 1107 * Send out data on TCP socket. 1108 */ 1109static int svc_tcp_sendto(struct svc_rqst *rqstp) 1110{ 1111 struct xdr_buf *xbufp = &rqstp->rq_res; 1112 int sent; 1113 __be32 reclen; 1114 1115 /* Set up the first element of the reply kvec. 1116 * Any other kvecs that may be in use have been taken 1117 * care of by the server implementation itself. 1118 */ 1119 reclen = htonl(0x80000000|((xbufp->len ) - 4)); 1120 memcpy(xbufp->head[0].iov_base, &reclen, 4); 1121 1122 if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags)) 1123 return -ENOTCONN; 1124 1125 sent = svc_sendto(rqstp, &rqstp->rq_res); 1126 if (sent != xbufp->len) { 1127 printk(KERN_NOTICE 1128 "rpc-srv/tcp: %s: %s %d when sending %d bytes " 1129 "- shutting down socket\n", 1130 rqstp->rq_xprt->xpt_server->sv_name, 1131 (sent<0)?"got error":"sent only", 1132 sent, xbufp->len); 1133 set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags); 1134 svc_xprt_enqueue(rqstp->rq_xprt); 1135 sent = -EAGAIN; 1136 } 1137 return sent; 1138} 1139 1140/* 1141 * Setup response header. TCP has a 4B record length field. 1142 */ 1143static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) 1144{ 1145 struct kvec *resv = &rqstp->rq_res.head[0]; 1146 1147 /* tcp needs a space for the record length... */ 1148 svc_putnl(resv, 0); 1149} 1150 1151static int svc_tcp_has_wspace(struct svc_xprt *xprt) 1152{ 1153 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1154 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 1155 int required; 1156 1157 if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) 1158 return 1; 1159 required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; 1160 if (sk_stream_wspace(svsk->sk_sk) >= required) 1161 return 1; 1162 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 1163 return 0; 1164} 1165 1166static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, 1167 struct sockaddr *sa, int salen, 1168 int flags) 1169{ 1170 return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); 1171} 1172 1173static struct svc_xprt_ops svc_tcp_ops = { 1174 .xpo_create = svc_tcp_create, 1175 .xpo_recvfrom = svc_tcp_recvfrom, 1176 .xpo_sendto = svc_tcp_sendto, 1177 .xpo_release_rqst = svc_release_skb, 1178 .xpo_detach = svc_tcp_sock_detach, 1179 .xpo_free = svc_sock_free, 1180 .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, 1181 .xpo_has_wspace = svc_tcp_has_wspace, 1182 .xpo_accept = svc_tcp_accept, 1183}; 1184 1185static struct svc_xprt_class svc_tcp_class = { 1186 .xcl_name = "tcp", 1187 .xcl_owner = THIS_MODULE, 1188 .xcl_ops = &svc_tcp_ops, 1189 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, 1190}; 1191 1192void svc_init_xprt_sock(void) 1193{ 1194 svc_reg_xprt_class(&svc_tcp_class); 1195 svc_reg_xprt_class(&svc_udp_class); 1196} 1197 1198void svc_cleanup_xprt_sock(void) 1199{ 1200 svc_unreg_xprt_class(&svc_tcp_class); 1201 svc_unreg_xprt_class(&svc_udp_class); 1202} 1203 1204static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) 1205{ 1206 struct sock *sk = svsk->sk_sk; 1207 1208 svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); 1209 set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); 1210 if (sk->sk_state == TCP_LISTEN) { 1211 dprintk("setting up TCP socket for listening\n"); 1212 set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); 1213 sk->sk_data_ready = svc_tcp_listen_data_ready; 1214 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); 1215 } else { 1216 dprintk("setting up TCP socket for reading\n"); 1217 sk->sk_state_change = svc_tcp_state_change; 1218 sk->sk_data_ready = svc_tcp_data_ready; 1219 sk->sk_write_space = svc_tcp_write_space; 1220 1221 svsk->sk_reclen = 0; 1222 svsk->sk_tcplen = 0; 1223 1224 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1225 1226 /* initialise setting must have enough space to 1227 * receive and respond to one request. 1228 * svc_tcp_recvfrom will re-adjust if necessary 1229 */ 1230 svc_sock_setbufsize(svsk->sk_sock, 1231 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, 1232 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); 1233 1234 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1235 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1236 if (sk->sk_state != TCP_ESTABLISHED) 1237 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1238 } 1239} 1240 1241void svc_sock_update_bufs(struct svc_serv *serv) 1242{ 1243 /* 1244 * The number of server threads has changed. Update 1245 * rcvbuf and sndbuf accordingly on all sockets 1246 */ 1247 struct list_head *le; 1248 1249 spin_lock_bh(&serv->sv_lock); 1250 list_for_each(le, &serv->sv_permsocks) { 1251 struct svc_sock *svsk = 1252 list_entry(le, struct svc_sock, sk_xprt.xpt_list); 1253 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1254 } 1255 list_for_each(le, &serv->sv_tempsocks) { 1256 struct svc_sock *svsk = 1257 list_entry(le, struct svc_sock, sk_xprt.xpt_list); 1258 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1259 } 1260 spin_unlock_bh(&serv->sv_lock); 1261} 1262EXPORT_SYMBOL_GPL(svc_sock_update_bufs); 1263 1264static struct svc_sock *svc_setup_socket(struct svc_serv *serv, 1265 struct socket *sock, 1266 int *errp, int flags) 1267{ 1268 struct svc_sock *svsk; 1269 struct sock *inet; 1270 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1271 1272 dprintk("svc: svc_setup_socket %p\n", sock); 1273 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { 1274 *errp = -ENOMEM; 1275 return NULL; 1276 } 1277 1278 inet = sock->sk; 1279 1280 /* Register socket with portmapper */ 1281 if (*errp >= 0 && pmap_register) 1282 *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, 1283 ntohs(inet_sk(inet)->inet_sport)); 1284 1285 if (*errp < 0) { 1286 kfree(svsk); 1287 return NULL; 1288 } 1289 1290 inet->sk_user_data = svsk; 1291 svsk->sk_sock = sock; 1292 svsk->sk_sk = inet; 1293 svsk->sk_ostate = inet->sk_state_change; 1294 svsk->sk_odata = inet->sk_data_ready; 1295 svsk->sk_owspace = inet->sk_write_space; 1296 1297 /* Initialize the socket */ 1298 if (sock->type == SOCK_DGRAM) 1299 svc_udp_init(svsk, serv); 1300 else 1301 svc_tcp_init(svsk, serv); 1302 1303 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1304 svsk, svsk->sk_sk); 1305 1306 return svsk; 1307} 1308 1309/** 1310 * svc_addsock - add a listener socket to an RPC service 1311 * @serv: pointer to RPC service to which to add a new listener 1312 * @fd: file descriptor of the new listener 1313 * @name_return: pointer to buffer to fill in with name of listener 1314 * @len: size of the buffer 1315 * 1316 * Fills in socket name and returns positive length of name if successful. 1317 * Name is terminated with '\n'. On error, returns a negative errno 1318 * value. 1319 */ 1320int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, 1321 const size_t len) 1322{ 1323 int err = 0; 1324 struct socket *so = sockfd_lookup(fd, &err); 1325 struct svc_sock *svsk = NULL; 1326 1327 if (!so) 1328 return err; 1329 if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6)) 1330 err = -EAFNOSUPPORT; 1331 else if (so->sk->sk_protocol != IPPROTO_TCP && 1332 so->sk->sk_protocol != IPPROTO_UDP) 1333 err = -EPROTONOSUPPORT; 1334 else if (so->state > SS_UNCONNECTED) 1335 err = -EISCONN; 1336 else { 1337 if (!try_module_get(THIS_MODULE)) 1338 err = -ENOENT; 1339 else 1340 svsk = svc_setup_socket(serv, so, &err, 1341 SVC_SOCK_DEFAULTS); 1342 if (svsk) { 1343 struct sockaddr_storage addr; 1344 struct sockaddr *sin = (struct sockaddr *)&addr; 1345 int salen; 1346 if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) 1347 svc_xprt_set_local(&svsk->sk_xprt, sin, salen); 1348 clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags); 1349 spin_lock_bh(&serv->sv_lock); 1350 list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks); 1351 spin_unlock_bh(&serv->sv_lock); 1352 svc_xprt_received(&svsk->sk_xprt); 1353 err = 0; 1354 } else 1355 module_put(THIS_MODULE); 1356 } 1357 if (err) { 1358 sockfd_put(so); 1359 return err; 1360 } 1361 return svc_one_sock_name(svsk, name_return, len); 1362} 1363EXPORT_SYMBOL_GPL(svc_addsock); 1364 1365/* 1366 * Create socket for RPC service. 1367 */ 1368static struct svc_xprt *svc_create_socket(struct svc_serv *serv, 1369 int protocol, 1370 struct sockaddr *sin, int len, 1371 int flags) 1372{ 1373 struct svc_sock *svsk; 1374 struct socket *sock; 1375 int error; 1376 int type; 1377 struct sockaddr_storage addr; 1378 struct sockaddr *newsin = (struct sockaddr *)&addr; 1379 int newlen; 1380 int family; 1381 int val; 1382 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 1383 1384 dprintk("svc: svc_create_socket(%s, %d, %s)\n", 1385 serv->sv_program->pg_name, protocol, 1386 __svc_print_addr(sin, buf, sizeof(buf))); 1387 1388 if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { 1389 printk(KERN_WARNING "svc: only UDP and TCP " 1390 "sockets supported\n"); 1391 return ERR_PTR(-EINVAL); 1392 } 1393 1394 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; 1395 switch (sin->sa_family) { 1396 case AF_INET6: 1397 family = PF_INET6; 1398 break; 1399 case AF_INET: 1400 family = PF_INET; 1401 break; 1402 default: 1403 return ERR_PTR(-EINVAL); 1404 } 1405 1406 error = sock_create_kern(family, type, protocol, &sock); 1407 if (error < 0) 1408 return ERR_PTR(error); 1409 1410 svc_reclassify_socket(sock); 1411 1412 /* 1413 * If this is an PF_INET6 listener, we want to avoid 1414 * getting requests from IPv4 remotes. Those should 1415 * be shunted to a PF_INET listener via rpcbind. 1416 */ 1417 val = 1; 1418 if (family == PF_INET6) 1419 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, 1420 (char *)&val, sizeof(val)); 1421 1422 if (type == SOCK_STREAM) 1423 sock->sk->sk_reuse = 1; /* allow address reuse */ 1424 error = kernel_bind(sock, sin, len); 1425 if (error < 0) 1426 goto bummer; 1427 1428 newlen = len; 1429 error = kernel_getsockname(sock, newsin, &newlen); 1430 if (error < 0) 1431 goto bummer; 1432 1433 if (protocol == IPPROTO_TCP) { 1434 if ((error = kernel_listen(sock, 64)) < 0) 1435 goto bummer; 1436 } 1437 1438 if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { 1439 svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); 1440 return (struct svc_xprt *)svsk; 1441 } 1442 1443bummer: 1444 dprintk("svc: svc_create_socket error = %d\n", -error); 1445 sock_release(sock); 1446 return ERR_PTR(error); 1447} 1448 1449/* 1450 * Detach the svc_sock from the socket so that no 1451 * more callbacks occur. 1452 */ 1453static void svc_sock_detach(struct svc_xprt *xprt) 1454{ 1455 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1456 struct sock *sk = svsk->sk_sk; 1457 1458 dprintk("svc: svc_sock_detach(%p)\n", svsk); 1459 1460 /* put back the old socket callbacks */ 1461 sk->sk_state_change = svsk->sk_ostate; 1462 sk->sk_data_ready = svsk->sk_odata; 1463 sk->sk_write_space = svsk->sk_owspace; 1464 1465 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 1466 wake_up_interruptible(sk_sleep(sk)); 1467} 1468 1469/* 1470 * Disconnect the socket, and reset the callbacks 1471 */ 1472static void svc_tcp_sock_detach(struct svc_xprt *xprt) 1473{ 1474 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1475 1476 dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk); 1477 1478 svc_sock_detach(xprt); 1479 1480 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) 1481 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); 1482} 1483 1484/* 1485 * Free the svc_sock's socket resources and the svc_sock itself. 1486 */ 1487static void svc_sock_free(struct svc_xprt *xprt) 1488{ 1489 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1490 dprintk("svc: svc_sock_free(%p)\n", svsk); 1491 1492 if (svsk->sk_sock->file) 1493 sockfd_put(svsk->sk_sock); 1494 else 1495 sock_release(svsk->sk_sock); 1496 kfree(svsk); 1497} 1498 1499/* 1500 * Create a svc_xprt. 1501 * 1502 * For internal use only (e.g. nfsv4.1 backchannel). 1503 * Callers should typically use the xpo_create() method. 1504 */ 1505struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot) 1506{ 1507 struct svc_sock *svsk; 1508 struct svc_xprt *xprt = NULL; 1509 1510 dprintk("svc: %s\n", __func__); 1511 svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); 1512 if (!svsk) 1513 goto out; 1514 1515 xprt = &svsk->sk_xprt; 1516 if (prot == IPPROTO_TCP) 1517 svc_xprt_init(&svc_tcp_class, xprt, serv); 1518 else if (prot == IPPROTO_UDP) 1519 svc_xprt_init(&svc_udp_class, xprt, serv); 1520 else 1521 BUG(); 1522out: 1523 dprintk("svc: %s return %p\n", __func__, xprt); 1524 return xprt; 1525} 1526EXPORT_SYMBOL_GPL(svc_sock_create); 1527 1528/* 1529 * Destroy a svc_sock. 1530 */ 1531void svc_sock_destroy(struct svc_xprt *xprt) 1532{ 1533 if (xprt) 1534 kfree(container_of(xprt, struct svc_sock, sk_xprt)); 1535} 1536EXPORT_SYMBOL_GPL(svc_sock_destroy); 1537