1/* 2 * net/dccp/proto.c 3 * 4 * An implementation of the DCCP protocol 5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12#include <linux/dccp.h> 13#include <linux/module.h> 14#include <linux/types.h> 15#include <linux/sched.h> 16#include <linux/kernel.h> 17#include <linux/skbuff.h> 18#include <linux/netdevice.h> 19#include <linux/in.h> 20#include <linux/if_arp.h> 21#include <linux/init.h> 22#include <linux/random.h> 23#include <net/checksum.h> 24 25#include <net/inet_sock.h> 26#include <net/sock.h> 27#include <net/xfrm.h> 28 29#include <asm/semaphore.h> 30#include <linux/spinlock.h> 31#include <linux/timer.h> 32#include <linux/delay.h> 33#include <linux/poll.h> 34 35#include "ccid.h" 36#include "dccp.h" 37#include "feat.h" 38 39DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; 40 41EXPORT_SYMBOL_GPL(dccp_statistics); 42 43atomic_t dccp_orphan_count = ATOMIC_INIT(0); 44 45EXPORT_SYMBOL_GPL(dccp_orphan_count); 46 47struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { 48 .lhash_lock = RW_LOCK_UNLOCKED, 49 .lhash_users = ATOMIC_INIT(0), 50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), 51}; 52 53EXPORT_SYMBOL_GPL(dccp_hashinfo); 54 55/* the maximum queue length for tx in packets. 0 is no limit */ 56int sysctl_dccp_tx_qlen __read_mostly = 5; 57 58void dccp_set_state(struct sock *sk, const int state) 59{ 60 const int oldstate = sk->sk_state; 61 62 dccp_pr_debug("%s(%p) %-10.10s -> %s\n", 63 dccp_role(sk), sk, 64 dccp_state_name(oldstate), dccp_state_name(state)); 65 WARN_ON(state == oldstate); 66 67 switch (state) { 68 case DCCP_OPEN: 69 if (oldstate != DCCP_OPEN) 70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB); 71 break; 72 73 case DCCP_CLOSED: 74 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) 75 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); 76 77 sk->sk_prot->unhash(sk); 78 if (inet_csk(sk)->icsk_bind_hash != NULL && 79 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) 80 inet_put_port(&dccp_hashinfo, sk); 81 /* fall through */ 82 default: 83 if (oldstate == DCCP_OPEN) 84 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); 85 } 86 87 /* Change state AFTER socket is unhashed to avoid closed 88 * socket sitting in hash tables. 89 */ 90 sk->sk_state = state; 91} 92 93EXPORT_SYMBOL_GPL(dccp_set_state); 94 95void dccp_done(struct sock *sk) 96{ 97 dccp_set_state(sk, DCCP_CLOSED); 98 dccp_clear_xmit_timers(sk); 99 100 sk->sk_shutdown = SHUTDOWN_MASK; 101 102 if (!sock_flag(sk, SOCK_DEAD)) 103 sk->sk_state_change(sk); 104 else 105 inet_csk_destroy_sock(sk); 106} 107 108EXPORT_SYMBOL_GPL(dccp_done); 109 110const char *dccp_packet_name(const int type) 111{ 112 static const char *dccp_packet_names[] = { 113 [DCCP_PKT_REQUEST] = "REQUEST", 114 [DCCP_PKT_RESPONSE] = "RESPONSE", 115 [DCCP_PKT_DATA] = "DATA", 116 [DCCP_PKT_ACK] = "ACK", 117 [DCCP_PKT_DATAACK] = "DATAACK", 118 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", 119 [DCCP_PKT_CLOSE] = "CLOSE", 120 [DCCP_PKT_RESET] = "RESET", 121 [DCCP_PKT_SYNC] = "SYNC", 122 [DCCP_PKT_SYNCACK] = "SYNCACK", 123 }; 124 125 if (type >= DCCP_NR_PKT_TYPES) 126 return "INVALID"; 127 else 128 return dccp_packet_names[type]; 129} 130 131EXPORT_SYMBOL_GPL(dccp_packet_name); 132 133const char *dccp_state_name(const int state) 134{ 135 static char *dccp_state_names[] = { 136 [DCCP_OPEN] = "OPEN", 137 [DCCP_REQUESTING] = "REQUESTING", 138 [DCCP_PARTOPEN] = "PARTOPEN", 139 [DCCP_LISTEN] = "LISTEN", 140 [DCCP_RESPOND] = "RESPOND", 141 [DCCP_CLOSING] = "CLOSING", 142 [DCCP_TIME_WAIT] = "TIME_WAIT", 143 [DCCP_CLOSED] = "CLOSED", 144 }; 145 146 if (state >= DCCP_MAX_STATES) 147 return "INVALID STATE!"; 148 else 149 return dccp_state_names[state]; 150} 151 152EXPORT_SYMBOL_GPL(dccp_state_name); 153 154void dccp_hash(struct sock *sk) 155{ 156 inet_hash(&dccp_hashinfo, sk); 157} 158 159EXPORT_SYMBOL_GPL(dccp_hash); 160 161void dccp_unhash(struct sock *sk) 162{ 163 inet_unhash(&dccp_hashinfo, sk); 164} 165 166EXPORT_SYMBOL_GPL(dccp_unhash); 167 168int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) 169{ 170 struct dccp_sock *dp = dccp_sk(sk); 171 struct dccp_minisock *dmsk = dccp_msk(sk); 172 struct inet_connection_sock *icsk = inet_csk(sk); 173 174 dccp_minisock_init(&dp->dccps_minisock); 175 do_gettimeofday(&dp->dccps_epoch); 176 177 if (likely(ctl_sock_initialized)) { 178 int rc = dccp_feat_init(dmsk); 179 180 if (rc) 181 return rc; 182 183 if (dmsk->dccpms_send_ack_vector) { 184 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL); 185 if (dp->dccps_hc_rx_ackvec == NULL) 186 return -ENOMEM; 187 } 188 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid, 189 sk, GFP_KERNEL); 190 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid, 191 sk, GFP_KERNEL); 192 if (unlikely(dp->dccps_hc_rx_ccid == NULL || 193 dp->dccps_hc_tx_ccid == NULL)) { 194 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); 195 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); 196 if (dmsk->dccpms_send_ack_vector) { 197 dccp_ackvec_free(dp->dccps_hc_rx_ackvec); 198 dp->dccps_hc_rx_ackvec = NULL; 199 } 200 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 201 return -ENOMEM; 202 } 203 } else { 204 /* control socket doesn't need feat nego */ 205 INIT_LIST_HEAD(&dmsk->dccpms_pending); 206 INIT_LIST_HEAD(&dmsk->dccpms_conf); 207 } 208 209 dccp_init_xmit_timers(sk); 210 icsk->icsk_rto = DCCP_TIMEOUT_INIT; 211 icsk->icsk_syn_retries = sysctl_dccp_request_retries; 212 sk->sk_state = DCCP_CLOSED; 213 sk->sk_write_space = dccp_write_space; 214 icsk->icsk_sync_mss = dccp_sync_mss; 215 dp->dccps_mss_cache = 536; 216 dp->dccps_role = DCCP_ROLE_UNDEFINED; 217 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 218 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; 219 220 return 0; 221} 222 223EXPORT_SYMBOL_GPL(dccp_init_sock); 224 225int dccp_destroy_sock(struct sock *sk) 226{ 227 struct dccp_sock *dp = dccp_sk(sk); 228 struct dccp_minisock *dmsk = dccp_msk(sk); 229 230 /* 231 * DCCP doesn't use sk_write_queue, just sk_send_head 232 * for retransmissions 233 */ 234 if (sk->sk_send_head != NULL) { 235 kfree_skb(sk->sk_send_head); 236 sk->sk_send_head = NULL; 237 } 238 239 /* Clean up a referenced DCCP bind bucket. */ 240 if (inet_csk(sk)->icsk_bind_hash != NULL) 241 inet_put_port(&dccp_hashinfo, sk); 242 243 kfree(dp->dccps_service_list); 244 dp->dccps_service_list = NULL; 245 246 if (dmsk->dccpms_send_ack_vector) { 247 dccp_ackvec_free(dp->dccps_hc_rx_ackvec); 248 dp->dccps_hc_rx_ackvec = NULL; 249 } 250 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); 251 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); 252 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 253 254 /* clean up feature negotiation state */ 255 dccp_feat_clean(dmsk); 256 257 return 0; 258} 259 260EXPORT_SYMBOL_GPL(dccp_destroy_sock); 261 262static inline int dccp_listen_start(struct sock *sk, int backlog) 263{ 264 struct dccp_sock *dp = dccp_sk(sk); 265 266 dp->dccps_role = DCCP_ROLE_LISTEN; 267 return inet_csk_listen_start(sk, backlog); 268} 269 270int dccp_disconnect(struct sock *sk, int flags) 271{ 272 struct inet_connection_sock *icsk = inet_csk(sk); 273 struct inet_sock *inet = inet_sk(sk); 274 int err = 0; 275 const int old_state = sk->sk_state; 276 277 if (old_state != DCCP_CLOSED) 278 dccp_set_state(sk, DCCP_CLOSED); 279 280 /* ABORT function of RFC793 */ 281 if (old_state == DCCP_LISTEN) { 282 inet_csk_listen_stop(sk); 283 } else if (old_state == DCCP_REQUESTING) 284 sk->sk_err = ECONNRESET; 285 286 dccp_clear_xmit_timers(sk); 287 __skb_queue_purge(&sk->sk_receive_queue); 288 if (sk->sk_send_head != NULL) { 289 __kfree_skb(sk->sk_send_head); 290 sk->sk_send_head = NULL; 291 } 292 293 inet->dport = 0; 294 295 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 296 inet_reset_saddr(sk); 297 298 sk->sk_shutdown = 0; 299 sock_reset_flag(sk, SOCK_DONE); 300 301 icsk->icsk_backoff = 0; 302 inet_csk_delack_init(sk); 303 __sk_dst_reset(sk); 304 305 BUG_TRAP(!inet->num || icsk->icsk_bind_hash); 306 307 sk->sk_error_report(sk); 308 return err; 309} 310 311EXPORT_SYMBOL_GPL(dccp_disconnect); 312 313/* 314 * Wait for a DCCP event. 315 * 316 * Note that we don't need to lock the socket, as the upper poll layers 317 * take care of normal races (between the test and the event) and we don't 318 * go look at any of the socket buffers directly. 319 */ 320unsigned int dccp_poll(struct file *file, struct socket *sock, 321 poll_table *wait) 322{ 323 unsigned int mask; 324 struct sock *sk = sock->sk; 325 326 poll_wait(file, sk->sk_sleep, wait); 327 if (sk->sk_state == DCCP_LISTEN) 328 return inet_csk_listen_poll(sk); 329 330 /* Socket is not locked. We are protected from async events 331 by poll logic and correct handling of state changes 332 made by another threads is impossible in any case. 333 */ 334 335 mask = 0; 336 if (sk->sk_err) 337 mask = POLLERR; 338 339 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) 340 mask |= POLLHUP; 341 if (sk->sk_shutdown & RCV_SHUTDOWN) 342 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 343 344 /* Connected? */ 345 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { 346 if (atomic_read(&sk->sk_rmem_alloc) > 0) 347 mask |= POLLIN | POLLRDNORM; 348 349 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 350 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 351 mask |= POLLOUT | POLLWRNORM; 352 } else { /* send SIGIO later */ 353 set_bit(SOCK_ASYNC_NOSPACE, 354 &sk->sk_socket->flags); 355 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 356 357 /* Race breaker. If space is freed after 358 * wspace test but before the flags are set, 359 * IO signal will be lost. 360 */ 361 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 362 mask |= POLLOUT | POLLWRNORM; 363 } 364 } 365 } 366 return mask; 367} 368 369EXPORT_SYMBOL_GPL(dccp_poll); 370 371int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) 372{ 373 dccp_pr_debug("entry\n"); 374 return -ENOIOCTLCMD; 375} 376 377EXPORT_SYMBOL_GPL(dccp_ioctl); 378 379static int dccp_setsockopt_service(struct sock *sk, const __be32 service, 380 char __user *optval, int optlen) 381{ 382 struct dccp_sock *dp = dccp_sk(sk); 383 struct dccp_service_list *sl = NULL; 384 385 if (service == DCCP_SERVICE_INVALID_VALUE || 386 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) 387 return -EINVAL; 388 389 if (optlen > sizeof(service)) { 390 sl = kmalloc(optlen, GFP_KERNEL); 391 if (sl == NULL) 392 return -ENOMEM; 393 394 sl->dccpsl_nr = optlen / sizeof(u32) - 1; 395 if (copy_from_user(sl->dccpsl_list, 396 optval + sizeof(service), 397 optlen - sizeof(service)) || 398 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { 399 kfree(sl); 400 return -EFAULT; 401 } 402 } 403 404 lock_sock(sk); 405 dp->dccps_service = service; 406 407 kfree(dp->dccps_service_list); 408 409 dp->dccps_service_list = sl; 410 release_sock(sk); 411 return 0; 412} 413 414/* byte 1 is feature. the rest is the preference list */ 415static int dccp_setsockopt_change(struct sock *sk, int type, 416 struct dccp_so_feat __user *optval) 417{ 418 struct dccp_so_feat opt; 419 u8 *val; 420 int rc; 421 422 if (copy_from_user(&opt, optval, sizeof(opt))) 423 return -EFAULT; 424 425 val = kmalloc(opt.dccpsf_len, GFP_KERNEL); 426 if (!val) 427 return -ENOMEM; 428 429 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) { 430 rc = -EFAULT; 431 goto out_free_val; 432 } 433 434 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat, 435 val, opt.dccpsf_len, GFP_KERNEL); 436 if (rc) 437 goto out_free_val; 438 439out: 440 return rc; 441 442out_free_val: 443 kfree(val); 444 goto out; 445} 446 447static int do_dccp_setsockopt(struct sock *sk, int level, int optname, 448 char __user *optval, int optlen) 449{ 450 struct dccp_sock *dp = dccp_sk(sk); 451 int val, err = 0; 452 453 if (optlen < sizeof(int)) 454 return -EINVAL; 455 456 if (get_user(val, (int __user *)optval)) 457 return -EFAULT; 458 459 if (optname == DCCP_SOCKOPT_SERVICE) 460 return dccp_setsockopt_service(sk, val, optval, optlen); 461 462 lock_sock(sk); 463 switch (optname) { 464 case DCCP_SOCKOPT_PACKET_SIZE: 465 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); 466 err = 0; 467 break; 468 case DCCP_SOCKOPT_CHANGE_L: 469 if (optlen != sizeof(struct dccp_so_feat)) 470 err = -EINVAL; 471 else 472 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, 473 (struct dccp_so_feat __user *) 474 optval); 475 break; 476 case DCCP_SOCKOPT_CHANGE_R: 477 if (optlen != sizeof(struct dccp_so_feat)) 478 err = -EINVAL; 479 else 480 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, 481 (struct dccp_so_feat __user *) 482 optval); 483 break; 484 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ 485 if (val < 0 || val > 15) 486 err = -EINVAL; 487 else 488 dp->dccps_pcslen = val; 489 break; 490 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */ 491 if (val < 0 || val > 15) 492 err = -EINVAL; 493 else { 494 dp->dccps_pcrlen = val; 495 } 496 break; 497 default: 498 err = -ENOPROTOOPT; 499 break; 500 } 501 502 release_sock(sk); 503 return err; 504} 505 506int dccp_setsockopt(struct sock *sk, int level, int optname, 507 char __user *optval, int optlen) 508{ 509 if (level != SOL_DCCP) 510 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level, 511 optname, optval, 512 optlen); 513 return do_dccp_setsockopt(sk, level, optname, optval, optlen); 514} 515 516EXPORT_SYMBOL_GPL(dccp_setsockopt); 517 518#ifdef CONFIG_COMPAT 519int compat_dccp_setsockopt(struct sock *sk, int level, int optname, 520 char __user *optval, int optlen) 521{ 522 if (level != SOL_DCCP) 523 return inet_csk_compat_setsockopt(sk, level, optname, 524 optval, optlen); 525 return do_dccp_setsockopt(sk, level, optname, optval, optlen); 526} 527 528EXPORT_SYMBOL_GPL(compat_dccp_setsockopt); 529#endif 530 531static int dccp_getsockopt_service(struct sock *sk, int len, 532 __be32 __user *optval, 533 int __user *optlen) 534{ 535 const struct dccp_sock *dp = dccp_sk(sk); 536 const struct dccp_service_list *sl; 537 int err = -ENOENT, slen = 0, total_len = sizeof(u32); 538 539 lock_sock(sk); 540 if ((sl = dp->dccps_service_list) != NULL) { 541 slen = sl->dccpsl_nr * sizeof(u32); 542 total_len += slen; 543 } 544 545 err = -EINVAL; 546 if (total_len > len) 547 goto out; 548 549 err = 0; 550 if (put_user(total_len, optlen) || 551 put_user(dp->dccps_service, optval) || 552 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) 553 err = -EFAULT; 554out: 555 release_sock(sk); 556 return err; 557} 558 559static int do_dccp_getsockopt(struct sock *sk, int level, int optname, 560 char __user *optval, int __user *optlen) 561{ 562 struct dccp_sock *dp; 563 int val, len; 564 565 if (get_user(len, optlen)) 566 return -EFAULT; 567 568 if (len < (int)sizeof(int)) 569 return -EINVAL; 570 571 dp = dccp_sk(sk); 572 573 switch (optname) { 574 case DCCP_SOCKOPT_PACKET_SIZE: 575 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); 576 return 0; 577 case DCCP_SOCKOPT_SERVICE: 578 return dccp_getsockopt_service(sk, len, 579 (__be32 __user *)optval, optlen); 580 case DCCP_SOCKOPT_SEND_CSCOV: 581 val = dp->dccps_pcslen; 582 len = sizeof(val); 583 break; 584 case DCCP_SOCKOPT_RECV_CSCOV: 585 val = dp->dccps_pcrlen; 586 len = sizeof(val); 587 break; 588 case 128 ... 191: 589 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 590 len, (u32 __user *)optval, optlen); 591 case 192 ... 255: 592 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, 593 len, (u32 __user *)optval, optlen); 594 default: 595 return -ENOPROTOOPT; 596 } 597 598 if (put_user(len, optlen) || copy_to_user(optval, &val, len)) 599 return -EFAULT; 600 601 return 0; 602} 603 604int dccp_getsockopt(struct sock *sk, int level, int optname, 605 char __user *optval, int __user *optlen) 606{ 607 if (level != SOL_DCCP) 608 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level, 609 optname, optval, 610 optlen); 611 return do_dccp_getsockopt(sk, level, optname, optval, optlen); 612} 613 614EXPORT_SYMBOL_GPL(dccp_getsockopt); 615 616#ifdef CONFIG_COMPAT 617int compat_dccp_getsockopt(struct sock *sk, int level, int optname, 618 char __user *optval, int __user *optlen) 619{ 620 if (level != SOL_DCCP) 621 return inet_csk_compat_getsockopt(sk, level, optname, 622 optval, optlen); 623 return do_dccp_getsockopt(sk, level, optname, optval, optlen); 624} 625 626EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); 627#endif 628 629int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 630 size_t len) 631{ 632 const struct dccp_sock *dp = dccp_sk(sk); 633 const int flags = msg->msg_flags; 634 const int noblock = flags & MSG_DONTWAIT; 635 struct sk_buff *skb; 636 int rc, size; 637 long timeo; 638 639 if (len > dp->dccps_mss_cache) 640 return -EMSGSIZE; 641 642 lock_sock(sk); 643 644 if (sysctl_dccp_tx_qlen && 645 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { 646 rc = -EAGAIN; 647 goto out_release; 648 } 649 650 timeo = sock_sndtimeo(sk, noblock); 651 652 /* 653 * We have to use sk_stream_wait_connect here to set sk_write_pending, 654 * so that the trick in dccp_rcv_request_sent_state_process. 655 */ 656 /* Wait for a connection to finish. */ 657 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) 658 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) 659 goto out_release; 660 661 size = sk->sk_prot->max_header + len; 662 release_sock(sk); 663 skb = sock_alloc_send_skb(sk, size, noblock, &rc); 664 lock_sock(sk); 665 if (skb == NULL) 666 goto out_release; 667 668 skb_reserve(skb, sk->sk_prot->max_header); 669 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 670 if (rc != 0) 671 goto out_discard; 672 673 skb_queue_tail(&sk->sk_write_queue, skb); 674 dccp_write_xmit(sk,0); 675out_release: 676 release_sock(sk); 677 return rc ? : len; 678out_discard: 679 kfree_skb(skb); 680 goto out_release; 681} 682 683EXPORT_SYMBOL_GPL(dccp_sendmsg); 684 685int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 686 size_t len, int nonblock, int flags, int *addr_len) 687{ 688 const struct dccp_hdr *dh; 689 long timeo; 690 691 lock_sock(sk); 692 693 if (sk->sk_state == DCCP_LISTEN) { 694 len = -ENOTCONN; 695 goto out; 696 } 697 698 timeo = sock_rcvtimeo(sk, nonblock); 699 700 do { 701 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 702 703 if (skb == NULL) 704 goto verify_sock_status; 705 706 dh = dccp_hdr(skb); 707 708 if (dh->dccph_type == DCCP_PKT_DATA || 709 dh->dccph_type == DCCP_PKT_DATAACK) 710 goto found_ok_skb; 711 712 if (dh->dccph_type == DCCP_PKT_RESET || 713 dh->dccph_type == DCCP_PKT_CLOSE) { 714 dccp_pr_debug("found fin ok!\n"); 715 len = 0; 716 goto found_fin_ok; 717 } 718 dccp_pr_debug("packet_type=%s\n", 719 dccp_packet_name(dh->dccph_type)); 720 sk_eat_skb(sk, skb, 0); 721verify_sock_status: 722 if (sock_flag(sk, SOCK_DONE)) { 723 len = 0; 724 break; 725 } 726 727 if (sk->sk_err) { 728 len = sock_error(sk); 729 break; 730 } 731 732 if (sk->sk_shutdown & RCV_SHUTDOWN) { 733 len = 0; 734 break; 735 } 736 737 if (sk->sk_state == DCCP_CLOSED) { 738 if (!sock_flag(sk, SOCK_DONE)) { 739 /* This occurs when user tries to read 740 * from never connected socket. 741 */ 742 len = -ENOTCONN; 743 break; 744 } 745 len = 0; 746 break; 747 } 748 749 if (!timeo) { 750 len = -EAGAIN; 751 break; 752 } 753 754 if (signal_pending(current)) { 755 len = sock_intr_errno(timeo); 756 break; 757 } 758 759 sk_wait_data(sk, &timeo); 760 continue; 761 found_ok_skb: 762 if (len > skb->len) 763 len = skb->len; 764 else if (len < skb->len) 765 msg->msg_flags |= MSG_TRUNC; 766 767 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) { 768 /* Exception. Bailout! */ 769 len = -EFAULT; 770 break; 771 } 772 found_fin_ok: 773 if (!(flags & MSG_PEEK)) 774 sk_eat_skb(sk, skb, 0); 775 break; 776 } while (1); 777out: 778 release_sock(sk); 779 return len; 780} 781 782EXPORT_SYMBOL_GPL(dccp_recvmsg); 783 784int inet_dccp_listen(struct socket *sock, int backlog) 785{ 786 struct sock *sk = sock->sk; 787 unsigned char old_state; 788 int err; 789 790 lock_sock(sk); 791 792 err = -EINVAL; 793 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) 794 goto out; 795 796 old_state = sk->sk_state; 797 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) 798 goto out; 799 800 /* Really, if the socket is already in listen state 801 * we can only allow the backlog to be adjusted. 802 */ 803 if (old_state != DCCP_LISTEN) { 804 err = dccp_listen_start(sk, backlog); 805 if (err) 806 goto out; 807 } 808 sk->sk_max_ack_backlog = backlog; 809 err = 0; 810 811out: 812 release_sock(sk); 813 return err; 814} 815 816EXPORT_SYMBOL_GPL(inet_dccp_listen); 817 818static const unsigned char dccp_new_state[] = { 819 /* current state: new state: action: */ 820 [0] = DCCP_CLOSED, 821 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, 822 [DCCP_REQUESTING] = DCCP_CLOSED, 823 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, 824 [DCCP_LISTEN] = DCCP_CLOSED, 825 [DCCP_RESPOND] = DCCP_CLOSED, 826 [DCCP_CLOSING] = DCCP_CLOSED, 827 [DCCP_TIME_WAIT] = DCCP_CLOSED, 828 [DCCP_CLOSED] = DCCP_CLOSED, 829}; 830 831static int dccp_close_state(struct sock *sk) 832{ 833 const int next = dccp_new_state[sk->sk_state]; 834 const int ns = next & DCCP_STATE_MASK; 835 836 if (ns != sk->sk_state) 837 dccp_set_state(sk, ns); 838 839 return next & DCCP_ACTION_FIN; 840} 841 842void dccp_close(struct sock *sk, long timeout) 843{ 844 struct dccp_sock *dp = dccp_sk(sk); 845 struct sk_buff *skb; 846 int state; 847 848 lock_sock(sk); 849 850 sk->sk_shutdown = SHUTDOWN_MASK; 851 852 if (sk->sk_state == DCCP_LISTEN) { 853 dccp_set_state(sk, DCCP_CLOSED); 854 855 /* Special case. */ 856 inet_csk_listen_stop(sk); 857 858 goto adjudge_to_death; 859 } 860 861 sk_stop_timer(sk, &dp->dccps_xmit_timer); 862 863 /* 864 * We need to flush the recv. buffs. We do this only on the 865 * descriptor close, not protocol-sourced closes, because the 866 *reader process may not have drained the data yet! 867 */ 868 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 869 __kfree_skb(skb); 870 } 871 872 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 873 /* Check zero linger _after_ checking for unread data. */ 874 sk->sk_prot->disconnect(sk, 0); 875 } else if (dccp_close_state(sk)) { 876 dccp_send_close(sk, 1); 877 } 878 879 sk_stream_wait_close(sk, timeout); 880 881adjudge_to_death: 882 state = sk->sk_state; 883 sock_hold(sk); 884 sock_orphan(sk); 885 atomic_inc(sk->sk_prot->orphan_count); 886 887 /* 888 * It is the last release_sock in its life. It will remove backlog. 889 */ 890 release_sock(sk); 891 /* 892 * Now socket is owned by kernel and we acquire BH lock 893 * to finish close. No need to check for user refs. 894 */ 895 local_bh_disable(); 896 bh_lock_sock(sk); 897 BUG_TRAP(!sock_owned_by_user(sk)); 898 899 /* Have we already been destroyed by a softirq or backlog? */ 900 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) 901 goto out; 902 903 /* 904 * The last release_sock may have processed the CLOSE or RESET 905 * packet moving sock to CLOSED state, if not we have to fire 906 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination" 907 * in draft-ietf-dccp-spec-11. -acme 908 */ 909 if (sk->sk_state == DCCP_CLOSING) { 910 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */ 911 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 912 inet_csk(sk)->icsk_rto, 913 DCCP_RTO_MAX); 914 } 915 916 if (sk->sk_state == DCCP_CLOSED) 917 inet_csk_destroy_sock(sk); 918 919 /* Otherwise, socket is reprieved until protocol close. */ 920 921out: 922 bh_unlock_sock(sk); 923 local_bh_enable(); 924 sock_put(sk); 925} 926 927EXPORT_SYMBOL_GPL(dccp_close); 928 929void dccp_shutdown(struct sock *sk, int how) 930{ 931 dccp_pr_debug("entry\n"); 932} 933 934EXPORT_SYMBOL_GPL(dccp_shutdown); 935 936static int __init dccp_mib_init(void) 937{ 938 int rc = -ENOMEM; 939 940 dccp_statistics[0] = alloc_percpu(struct dccp_mib); 941 if (dccp_statistics[0] == NULL) 942 goto out; 943 944 dccp_statistics[1] = alloc_percpu(struct dccp_mib); 945 if (dccp_statistics[1] == NULL) 946 goto out_free_one; 947 948 rc = 0; 949out: 950 return rc; 951out_free_one: 952 free_percpu(dccp_statistics[0]); 953 dccp_statistics[0] = NULL; 954 goto out; 955 956} 957 958static void dccp_mib_exit(void) 959{ 960 free_percpu(dccp_statistics[0]); 961 free_percpu(dccp_statistics[1]); 962 dccp_statistics[0] = dccp_statistics[1] = NULL; 963} 964 965static int thash_entries; 966module_param(thash_entries, int, 0444); 967MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); 968 969#ifdef CONFIG_IP_DCCP_DEBUG 970int dccp_debug; 971module_param(dccp_debug, int, 0444); 972MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); 973 974EXPORT_SYMBOL_GPL(dccp_debug); 975#endif 976 977static int __init dccp_init(void) 978{ 979 unsigned long goal; 980 int ehash_order, bhash_order, i; 981 int rc = -ENOBUFS; 982 983 dccp_hashinfo.bind_bucket_cachep = 984 kmem_cache_create("dccp_bind_bucket", 985 sizeof(struct inet_bind_bucket), 0, 986 SLAB_HWCACHE_ALIGN, NULL, NULL); 987 if (!dccp_hashinfo.bind_bucket_cachep) 988 goto out; 989 990 /* 991 * Size and allocate the main established and bind bucket 992 * hash tables. 993 * 994 * The methodology is similar to that of the buffer cache. 995 */ 996 if (num_physpages >= (128 * 1024)) 997 goal = num_physpages >> (21 - PAGE_SHIFT); 998 else 999 goal = num_physpages >> (23 - PAGE_SHIFT); 1000 1001 if (thash_entries) 1002 goal = (thash_entries * 1003 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; 1004 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) 1005 ; 1006 do { 1007 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / 1008 sizeof(struct inet_ehash_bucket); 1009 while (dccp_hashinfo.ehash_size & 1010 (dccp_hashinfo.ehash_size - 1)) 1011 dccp_hashinfo.ehash_size--; 1012 dccp_hashinfo.ehash = (struct inet_ehash_bucket *) 1013 __get_free_pages(GFP_ATOMIC, ehash_order); 1014 } while (!dccp_hashinfo.ehash && --ehash_order > 0); 1015 1016 if (!dccp_hashinfo.ehash) { 1017 DCCP_CRIT("Failed to allocate DCCP established hash table"); 1018 goto out_free_bind_bucket_cachep; 1019 } 1020 1021 for (i = 0; i < dccp_hashinfo.ehash_size; i++) { 1022 rwlock_init(&dccp_hashinfo.ehash[i].lock); 1023 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); 1024 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); 1025 } 1026 1027 bhash_order = ehash_order; 1028 1029 do { 1030 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / 1031 sizeof(struct inet_bind_hashbucket); 1032 if ((dccp_hashinfo.bhash_size > (64 * 1024)) && 1033 bhash_order > 0) 1034 continue; 1035 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) 1036 __get_free_pages(GFP_ATOMIC, bhash_order); 1037 } while (!dccp_hashinfo.bhash && --bhash_order >= 0); 1038 1039 if (!dccp_hashinfo.bhash) { 1040 DCCP_CRIT("Failed to allocate DCCP bind hash table"); 1041 goto out_free_dccp_ehash; 1042 } 1043 1044 for (i = 0; i < dccp_hashinfo.bhash_size; i++) { 1045 spin_lock_init(&dccp_hashinfo.bhash[i].lock); 1046 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); 1047 } 1048 1049 rc = dccp_mib_init(); 1050 if (rc) 1051 goto out_free_dccp_bhash; 1052 1053 rc = dccp_ackvec_init(); 1054 if (rc) 1055 goto out_free_dccp_mib; 1056 1057 rc = dccp_sysctl_init(); 1058 if (rc) 1059 goto out_ackvec_exit; 1060out: 1061 return rc; 1062out_ackvec_exit: 1063 dccp_ackvec_exit(); 1064out_free_dccp_mib: 1065 dccp_mib_exit(); 1066out_free_dccp_bhash: 1067 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); 1068 dccp_hashinfo.bhash = NULL; 1069out_free_dccp_ehash: 1070 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); 1071 dccp_hashinfo.ehash = NULL; 1072out_free_bind_bucket_cachep: 1073 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 1074 dccp_hashinfo.bind_bucket_cachep = NULL; 1075 goto out; 1076} 1077 1078static void __exit dccp_fini(void) 1079{ 1080 dccp_mib_exit(); 1081 free_pages((unsigned long)dccp_hashinfo.bhash, 1082 get_order(dccp_hashinfo.bhash_size * 1083 sizeof(struct inet_bind_hashbucket))); 1084 free_pages((unsigned long)dccp_hashinfo.ehash, 1085 get_order(dccp_hashinfo.ehash_size * 1086 sizeof(struct inet_ehash_bucket))); 1087 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 1088 dccp_ackvec_exit(); 1089 dccp_sysctl_exit(); 1090} 1091 1092module_init(dccp_init); 1093module_exit(dccp_fini); 1094 1095MODULE_LICENSE("GPL"); 1096MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); 1097MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); 1098