1/* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * Copyright (c) 2016 Chelsio Communications. All rights reserved. 9 * 10 * This software is available to you under a choice of one of two 11 * licenses. You may choose to be licensed under the terms of the GNU 12 * General Public License (GPL) Version 2, available from the file 13 * COPYING in the main directory of this source tree, or the 14 * OpenIB.org BSD license below: 15 * 16 * Redistribution and use in source and binary forms, with or 17 * without modification, are permitted provided that the following 18 * conditions are met: 19 * 20 * - Redistributions of source code must retain the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer. 23 * 24 * - Redistributions in binary form must reproduce the above 25 * copyright notice, this list of conditions and the following 26 * disclaimer in the documentation and/or other materials 27 * provided with the distribution. 28 * 29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 * SOFTWARE. 37 * 38 */ 39#include "opt_inet.h" 40 41#include <linux/dma-mapping.h> 42#include <linux/err.h> 43#include <linux/idr.h> 44#include <linux/interrupt.h> 45#include <linux/rbtree.h> 46#include <linux/spinlock.h> 47#include <linux/workqueue.h> 48#include <linux/completion.h> 49#include <linux/string.h> 50#include <netinet/tcp.h> 51#include <sys/mutex.h> 52 53#include <rdma/rdma_cm.h> 54#include <rdma/iw_cm.h> 55#include <rdma/ib_addr.h> 56 57#include "iwcm.h" 58 59MODULE_AUTHOR("Tom Tucker"); 60MODULE_DESCRIPTION("iWARP CM"); 61MODULE_LICENSE("Dual BSD/GPL"); 62 63static struct workqueue_struct *iwcm_wq; 64struct iwcm_work { 65 struct work_struct work; 66 struct iwcm_id_private *cm_id; 67 struct list_head list; 68 struct iw_cm_event event; 69 struct list_head free_list; 70}; 71struct iwcm_listen_work { 72 struct work_struct work; 73 struct iw_cm_id *cm_id; 74}; 75 76static LIST_HEAD(listen_port_list); 77 78static DEFINE_MUTEX(listen_port_mutex); 79 80struct listen_port_info { 81 struct list_head list; 82 uint16_t port_num; 83 uint32_t refcnt; 84}; 85 86static int32_t 87add_port_to_listenlist(uint16_t port) 88{ 89 struct listen_port_info *port_info; 90 int err = 0; 91 92 mutex_lock(&listen_port_mutex); 93 94 list_for_each_entry(port_info, &listen_port_list, list) 95 if (port_info->port_num == port) 96 goto found_port; 97 98 port_info = kmalloc(sizeof(*port_info), GFP_KERNEL); 99 if (!port_info) { 100 err = -ENOMEM; 101 mutex_unlock(&listen_port_mutex); 102 goto out; 103 } 104 105 port_info->port_num = port; 106 port_info->refcnt = 0; 107 108 list_add(&port_info->list, &listen_port_list); 109 110found_port: 111 ++(port_info->refcnt); 112 mutex_unlock(&listen_port_mutex); 113 return port_info->refcnt; 114out: 115 return err; 116} 117 118static int32_t 119rem_port_from_listenlist(uint16_t port) 120{ 121 struct listen_port_info *port_info; 122 int ret, found_port = 0; 123 124 mutex_lock(&listen_port_mutex); 125 126 list_for_each_entry(port_info, &listen_port_list, list) 127 if (port_info->port_num == port) { 128 found_port = 1; 129 break; 130 } 131 132 if (found_port) { 133 --(port_info->refcnt); 134 ret = port_info->refcnt; 135 if (port_info->refcnt == 0) { 136 /* Remove this entry from the list as there are no 137 * more listeners for this port_num. 138 */ 139 list_del(&port_info->list); 140 kfree(port_info); 141 } 142 } else { 143 ret = -EINVAL; 144 } 145 mutex_unlock(&listen_port_mutex); 146 return ret; 147 148} 149 150/* 151 * The following services provide a mechanism for pre-allocating iwcm_work 152 * elements. The design pre-allocates them based on the cm_id type: 153 * LISTENING IDS: Get enough elements preallocated to handle the 154 * listen backlog. 155 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 156 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 157 * 158 * Allocating them in connect and listen avoids having to deal 159 * with allocation failures on the event upcall from the provider (which 160 * is called in the interrupt context). 161 * 162 * One exception is when creating the cm_id for incoming connection requests. 163 * There are two cases: 164 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 165 * the backlog is exceeded, then no more connection request events will 166 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 167 * to the provider to reject the connection request. 168 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 169 * If work elements cannot be allocated for the new connect request cm_id, 170 * then IWCM will call the provider reject method. This is ok since 171 * cm_conn_req_handler() runs in the workqueue thread context. 172 */ 173 174static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 175{ 176 struct iwcm_work *work; 177 178 if (list_empty(&cm_id_priv->work_free_list)) 179 return NULL; 180 work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, 181 free_list); 182 list_del_init(&work->free_list); 183 return work; 184} 185 186static void put_work(struct iwcm_work *work) 187{ 188 list_add(&work->free_list, &work->cm_id->work_free_list); 189} 190 191static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 192{ 193 struct list_head *e, *tmp; 194 195 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) 196 kfree(list_entry(e, struct iwcm_work, free_list)); 197} 198 199static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 200{ 201 struct iwcm_work *work; 202 203 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 204 while (count--) { 205 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 206 if (!work) { 207 dealloc_work_entries(cm_id_priv); 208 return -ENOMEM; 209 } 210 work->cm_id = cm_id_priv; 211 INIT_LIST_HEAD(&work->list); 212 put_work(work); 213 } 214 return 0; 215} 216 217/* 218 * Save private data from incoming connection requests to 219 * iw_cm_event, so the low level driver doesn't have to. Adjust 220 * the event ptr to point to the local copy. 221 */ 222static int copy_private_data(struct iw_cm_event *event) 223{ 224 void *p; 225 226 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 227 if (!p) 228 return -ENOMEM; 229 event->private_data = p; 230 return 0; 231} 232 233static void free_cm_id(struct iwcm_id_private *cm_id_priv) 234{ 235 dealloc_work_entries(cm_id_priv); 236 kfree(cm_id_priv); 237} 238 239/* 240 * Release a reference on cm_id. If the last reference is being 241 * released, enable the waiting thread (in iw_destroy_cm_id) to 242 * get woken up, and return 1 if a thread is already waiting. 243 */ 244static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 245{ 246 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 247 if (atomic_dec_and_test(&cm_id_priv->refcount)) { 248 BUG_ON(!list_empty(&cm_id_priv->work_list)); 249 complete(&cm_id_priv->destroy_comp); 250 return 1; 251 } 252 253 return 0; 254} 255 256static void add_ref(struct iw_cm_id *cm_id) 257{ 258 struct iwcm_id_private *cm_id_priv; 259 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 260 atomic_inc(&cm_id_priv->refcount); 261} 262 263static void rem_ref(struct iw_cm_id *cm_id) 264{ 265 struct iwcm_id_private *cm_id_priv; 266 int cb_destroy; 267 268 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 269 270 /* 271 * Test bit before deref in case the cm_id gets freed on another 272 * thread. 273 */ 274 cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 275 if (iwcm_deref_id(cm_id_priv) && cb_destroy) { 276 BUG_ON(!list_empty(&cm_id_priv->work_list)); 277 free_cm_id(cm_id_priv); 278 } 279} 280 281static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 282 283struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 284 struct socket *so, 285 iw_cm_handler cm_handler, 286 void *context) 287{ 288 struct iwcm_id_private *cm_id_priv; 289 290 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 291 if (!cm_id_priv) 292 return ERR_PTR(-ENOMEM); 293 294 cm_id_priv->state = IW_CM_STATE_IDLE; 295 cm_id_priv->id.device = device; 296 cm_id_priv->id.cm_handler = cm_handler; 297 cm_id_priv->id.context = context; 298 cm_id_priv->id.event_handler = cm_event_handler; 299 cm_id_priv->id.add_ref = add_ref; 300 cm_id_priv->id.rem_ref = rem_ref; 301 cm_id_priv->id.so = so; 302 spin_lock_init(&cm_id_priv->lock); 303 atomic_set(&cm_id_priv->refcount, 1); 304 init_waitqueue_head(&cm_id_priv->connect_wait); 305 init_completion(&cm_id_priv->destroy_comp); 306 INIT_LIST_HEAD(&cm_id_priv->work_list); 307 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 308 309 return &cm_id_priv->id; 310} 311EXPORT_SYMBOL(iw_create_cm_id); 312 313 314static int iwcm_modify_qp_err(struct ib_qp *qp) 315{ 316 struct ib_qp_attr qp_attr; 317 318 if (!qp) 319 return -EINVAL; 320 321 qp_attr.qp_state = IB_QPS_ERR; 322 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 323} 324 325/* 326 * This is really the RDMAC CLOSING state. It is most similar to the 327 * IB SQD QP state. 328 */ 329static int iwcm_modify_qp_sqd(struct ib_qp *qp) 330{ 331 struct ib_qp_attr qp_attr; 332 333 BUG_ON(qp == NULL); 334 qp_attr.qp_state = IB_QPS_SQD; 335 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 336} 337 338/* 339 * CM_ID <-- CLOSING 340 * 341 * Block if a passive or active connection is currently being processed. Then 342 * process the event as follows: 343 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 344 * based on the abrupt flag 345 * - If the connection is already in the CLOSING or IDLE state, the peer is 346 * disconnecting concurrently with us and we've already seen the 347 * DISCONNECT event -- ignore the request and return 0 348 * - Disconnect on a listening endpoint returns -EINVAL 349 */ 350int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 351{ 352 struct iwcm_id_private *cm_id_priv; 353 unsigned long flags; 354 int ret = 0; 355 struct ib_qp *qp = NULL; 356 357 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 358 /* Wait if we're currently in a connect or accept downcall */ 359 wait_event(cm_id_priv->connect_wait, 360 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 361 362 spin_lock_irqsave(&cm_id_priv->lock, flags); 363 switch (cm_id_priv->state) { 364 case IW_CM_STATE_ESTABLISHED: 365 cm_id_priv->state = IW_CM_STATE_CLOSING; 366 367 /* QP could be <nul> for user-mode client */ 368 if (cm_id_priv->qp) 369 qp = cm_id_priv->qp; 370 else 371 ret = -EINVAL; 372 break; 373 case IW_CM_STATE_LISTEN: 374 ret = -EINVAL; 375 break; 376 case IW_CM_STATE_CLOSING: 377 /* remote peer closed first */ 378 case IW_CM_STATE_IDLE: 379 /* accept or connect returned !0 */ 380 break; 381 case IW_CM_STATE_CONN_RECV: 382 /* 383 * App called disconnect before/without calling accept after 384 * connect_request event delivered. 385 */ 386 break; 387 case IW_CM_STATE_CONN_SENT: 388 /* Can only get here if wait above fails */ 389 default: 390 BUG(); 391 } 392 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 393 394 if (qp) { 395 if (abrupt) 396 ret = iwcm_modify_qp_err(qp); 397 else 398 ret = iwcm_modify_qp_sqd(qp); 399 400 /* 401 * If both sides are disconnecting the QP could 402 * already be in ERR or SQD states 403 */ 404 ret = 0; 405 } 406 407 return ret; 408} 409EXPORT_SYMBOL(iw_cm_disconnect); 410 411static struct socket * 412dequeue_socket(struct socket *head) 413{ 414 struct socket *so; 415 struct sockaddr_in *remote; 416 417 ACCEPT_LOCK(); 418 so = TAILQ_FIRST(&head->so_comp); 419 if (!so) { 420 ACCEPT_UNLOCK(); 421 return NULL; 422 } 423 424 SOCK_LOCK(so); 425 /* 426 * Before changing the flags on the socket, we have to bump the 427 * reference count. Otherwise, if the protocol calls sofree(), 428 * the socket will be released due to a zero refcount. 429 */ 430 soref(so); 431 TAILQ_REMOVE(&head->so_comp, so, so_list); 432 head->so_qlen--; 433 so->so_qstate &= ~SQ_COMP; 434 so->so_head = NULL; 435 so->so_state |= SS_NBIO; 436 SOCK_UNLOCK(so); 437 ACCEPT_UNLOCK(); 438 remote = NULL; 439 soaccept(so, (struct sockaddr **)&remote); 440 441 free(remote, M_SONAME); 442 return so; 443} 444static void 445iw_so_event_handler(struct work_struct *_work) 446{ 447#ifdef INET 448 struct iwcm_listen_work *work = container_of(_work, 449 struct iwcm_listen_work, work); 450 struct iw_cm_id *listen_cm_id = work->cm_id; 451 struct iwcm_id_private *cm_id_priv; 452 struct iw_cm_id *real_cm_id; 453 struct sockaddr_in *local; 454 struct socket *so; 455 456 cm_id_priv = container_of(listen_cm_id, struct iwcm_id_private, id); 457 458 if (cm_id_priv->state != IW_CM_STATE_LISTEN) { 459 kfree(work); 460 return; 461 } 462 463 /* Dequeue & process all new 'so' connection requests for this cmid */ 464 while ((so = dequeue_socket(work->cm_id->so)) != NULL) { 465 if (rdma_cma_any_addr((struct sockaddr *) 466 &listen_cm_id->local_addr)) { 467 in_getsockaddr(so, (struct sockaddr **)&local); 468 if (rdma_find_cmid_laddr(local, ARPHRD_ETHER, 469 (void **) &real_cm_id)) { 470 free(local, M_SONAME); 471 goto err; 472 } 473 free(local, M_SONAME); 474 475 real_cm_id->device->iwcm->newconn(real_cm_id, so); 476 } else { 477 listen_cm_id->device->iwcm->newconn(listen_cm_id, so); 478 } 479 } 480err: 481 kfree(work); 482#endif 483 return; 484} 485static int 486iw_so_upcall(struct socket *parent_so, void *arg, int waitflag) 487{ 488 struct iwcm_listen_work *work; 489 struct socket *so; 490 struct iw_cm_id *cm_id = arg; 491 492 /* check whether iw_so_event_handler() already dequeued this 'so' */ 493 so = TAILQ_FIRST(&parent_so->so_comp); 494 if (!so) 495 return SU_OK; 496 work = kzalloc(sizeof(*work), M_NOWAIT); 497 if (!work) 498 return -ENOMEM; 499 work->cm_id = cm_id; 500 501 INIT_WORK(&work->work, iw_so_event_handler); 502 queue_work(iwcm_wq, &work->work); 503 504 return SU_OK; 505} 506 507static void 508iw_init_sock(struct iw_cm_id *cm_id) 509{ 510 struct sockopt sopt; 511 struct socket *so = cm_id->so; 512 int on = 1; 513 514 SOCK_LOCK(so); 515 soupcall_set(so, SO_RCV, iw_so_upcall, cm_id); 516 so->so_state |= SS_NBIO; 517 SOCK_UNLOCK(so); 518 sopt.sopt_dir = SOPT_SET; 519 sopt.sopt_level = IPPROTO_TCP; 520 sopt.sopt_name = TCP_NODELAY; 521 sopt.sopt_val = (caddr_t)&on; 522 sopt.sopt_valsize = sizeof(on); 523 sopt.sopt_td = NULL; 524 sosetopt(so, &sopt); 525} 526 527static int 528iw_uninit_socket(struct iw_cm_id *cm_id) 529{ 530 struct socket *so = cm_id->so; 531 532 SOCK_LOCK(so); 533 soupcall_clear(so, SO_RCV); 534 SOCK_UNLOCK(so); 535 536 return (0); 537} 538 539static int 540iw_create_listen(struct iw_cm_id *cm_id, int backlog) 541{ 542 int rc; 543 544 iw_init_sock(cm_id); 545 rc = -solisten(cm_id->so, backlog, curthread); 546 if (rc != 0) 547 iw_uninit_socket(cm_id); 548 return (rc); 549} 550 551static int 552iw_destroy_listen(struct iw_cm_id *cm_id) 553{ 554 555 return (iw_uninit_socket(cm_id)); 556} 557 558 559/* 560 * CM_ID <-- DESTROYING 561 * 562 * Clean up all resources associated with the connection and release 563 * the initial reference taken by iw_create_cm_id. 564 */ 565static void destroy_cm_id(struct iw_cm_id *cm_id) 566{ 567 struct iwcm_id_private *cm_id_priv; 568 unsigned long flags; 569 int ret = 0, refcnt; 570 571 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 572 /* 573 * Wait if we're currently in a connect or accept downcall. A 574 * listening endpoint should never block here. 575 */ 576 wait_event(cm_id_priv->connect_wait, 577 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 578 579 spin_lock_irqsave(&cm_id_priv->lock, flags); 580 switch (cm_id_priv->state) { 581 case IW_CM_STATE_LISTEN: 582 cm_id_priv->state = IW_CM_STATE_DESTROYING; 583 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 584 if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) { 585 refcnt = 586 rem_port_from_listenlist(cm_id->local_addr.sin_port); 587 588 if (refcnt == 0) 589 ret = iw_destroy_listen(cm_id); 590 591 cm_id->device->iwcm->destroy_listen_ep(cm_id); 592 } else { 593 ret = iw_destroy_listen(cm_id); 594 cm_id->device->iwcm->destroy_listen_ep(cm_id); 595 } 596 spin_lock_irqsave(&cm_id_priv->lock, flags); 597 break; 598 case IW_CM_STATE_ESTABLISHED: 599 cm_id_priv->state = IW_CM_STATE_DESTROYING; 600 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 601 /* Abrupt close of the connection */ 602 (void)iwcm_modify_qp_err(cm_id_priv->qp); 603 spin_lock_irqsave(&cm_id_priv->lock, flags); 604 break; 605 case IW_CM_STATE_IDLE: 606 case IW_CM_STATE_CLOSING: 607 cm_id_priv->state = IW_CM_STATE_DESTROYING; 608 break; 609 case IW_CM_STATE_CONN_RECV: 610 /* 611 * App called destroy before/without calling accept after 612 * receiving connection request event notification or 613 * returned non zero from the event callback function. 614 * In either case, must tell the provider to reject. 615 */ 616 cm_id_priv->state = IW_CM_STATE_DESTROYING; 617 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 618 cm_id->device->iwcm->reject(cm_id, NULL, 0); 619 spin_lock_irqsave(&cm_id_priv->lock, flags); 620 break; 621 case IW_CM_STATE_CONN_SENT: 622 case IW_CM_STATE_DESTROYING: 623 default: 624 BUG(); 625 break; 626 } 627 if (cm_id_priv->qp) { 628 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 629 cm_id_priv->qp = NULL; 630 } 631 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 632 633 (void)iwcm_deref_id(cm_id_priv); 634} 635 636/* 637 * This function is only called by the application thread and cannot 638 * be called by the event thread. The function will wait for all 639 * references to be released on the cm_id and then kfree the cm_id 640 * object. 641 */ 642void iw_destroy_cm_id(struct iw_cm_id *cm_id) 643{ 644 struct iwcm_id_private *cm_id_priv; 645 646 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 647 BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); 648 649 destroy_cm_id(cm_id); 650 651 wait_for_completion(&cm_id_priv->destroy_comp); 652 653 if (cm_id->so) 654 sock_release(cm_id->so); 655 656 free_cm_id(cm_id_priv); 657} 658EXPORT_SYMBOL(iw_destroy_cm_id); 659 660/* 661 * CM_ID <-- LISTEN 662 * 663 * Start listening for connect requests. Generates one CONNECT_REQUEST 664 * event for each inbound connect request. 665 */ 666int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 667{ 668 struct iwcm_id_private *cm_id_priv; 669 unsigned long flags; 670 int ret, refcnt; 671 672 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 673 674 ret = alloc_work_entries(cm_id_priv, backlog); 675 if (ret) 676 return ret; 677 678 spin_lock_irqsave(&cm_id_priv->lock, flags); 679 switch (cm_id_priv->state) { 680 case IW_CM_STATE_IDLE: 681 cm_id_priv->state = IW_CM_STATE_LISTEN; 682 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 683 684 if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) { 685 refcnt = 686 add_port_to_listenlist(cm_id->local_addr.sin_port); 687 688 if (refcnt == 1) { 689 ret = iw_create_listen(cm_id, backlog); 690 } else if (refcnt <= 0) { 691 ret = -EINVAL; 692 } else { 693 /* if refcnt > 1, a socket listener created 694 * already. And we need not create socket 695 * listener on other rdma devices/listen cm_id's 696 * due to TOE. That is when a socket listener is 697 * created with INADDR_ANY all registered TOE 698 * devices will get a call to start 699 * hardware listeners. 700 */ 701 } 702 } else { 703 ret = iw_create_listen(cm_id, backlog); 704 } 705 if (!ret) 706 cm_id->device->iwcm->create_listen_ep(cm_id, backlog); 707 else 708 cm_id_priv->state = IW_CM_STATE_IDLE; 709 710 spin_lock_irqsave(&cm_id_priv->lock, flags); 711 break; 712 default: 713 ret = -EINVAL; 714 } 715 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 716 717 return ret; 718} 719EXPORT_SYMBOL(iw_cm_listen); 720 721/* 722 * CM_ID <-- IDLE 723 * 724 * Rejects an inbound connection request. No events are generated. 725 */ 726int iw_cm_reject(struct iw_cm_id *cm_id, 727 const void *private_data, 728 u8 private_data_len) 729{ 730 struct iwcm_id_private *cm_id_priv; 731 unsigned long flags; 732 int ret; 733 734 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 735 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 736 737 spin_lock_irqsave(&cm_id_priv->lock, flags); 738 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 739 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 740 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 741 wake_up_all(&cm_id_priv->connect_wait); 742 return -EINVAL; 743 } 744 cm_id_priv->state = IW_CM_STATE_IDLE; 745 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 746 747 ret = cm_id->device->iwcm->reject(cm_id, private_data, 748 private_data_len); 749 750 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 751 wake_up_all(&cm_id_priv->connect_wait); 752 753 return ret; 754} 755EXPORT_SYMBOL(iw_cm_reject); 756 757/* 758 * CM_ID <-- ESTABLISHED 759 * 760 * Accepts an inbound connection request and generates an ESTABLISHED 761 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 762 * until the ESTABLISHED event is received from the provider. 763 */ 764int iw_cm_accept(struct iw_cm_id *cm_id, 765 struct iw_cm_conn_param *iw_param) 766{ 767 struct iwcm_id_private *cm_id_priv; 768 struct ib_qp *qp; 769 unsigned long flags; 770 int ret; 771 772 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 773 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 774 775 spin_lock_irqsave(&cm_id_priv->lock, flags); 776 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 777 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 778 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 779 wake_up_all(&cm_id_priv->connect_wait); 780 return -EINVAL; 781 } 782 /* Get the ib_qp given the QPN */ 783 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 784 if (!qp) { 785 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 786 return -EINVAL; 787 } 788 cm_id->device->iwcm->add_ref(qp); 789 cm_id_priv->qp = qp; 790 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 791 792 ret = cm_id->device->iwcm->accept(cm_id, iw_param); 793 if (ret) { 794 /* An error on accept precludes provider events */ 795 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 796 cm_id_priv->state = IW_CM_STATE_IDLE; 797 spin_lock_irqsave(&cm_id_priv->lock, flags); 798 if (cm_id_priv->qp) { 799 cm_id->device->iwcm->rem_ref(qp); 800 cm_id_priv->qp = NULL; 801 } 802 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 803 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 804 wake_up_all(&cm_id_priv->connect_wait); 805 } 806 807 return ret; 808} 809EXPORT_SYMBOL(iw_cm_accept); 810 811/* 812 * Active Side: CM_ID <-- CONN_SENT 813 * 814 * If successful, results in the generation of a CONNECT_REPLY 815 * event. iw_cm_disconnect and iw_cm_destroy will block until the 816 * CONNECT_REPLY event is received from the provider. 817 */ 818int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 819{ 820 struct iwcm_id_private *cm_id_priv; 821 int ret; 822 unsigned long flags; 823 struct ib_qp *qp; 824 825 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 826 827 ret = alloc_work_entries(cm_id_priv, 4); 828 if (ret) 829 return ret; 830 831 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 832 spin_lock_irqsave(&cm_id_priv->lock, flags); 833 834 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 835 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 836 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 837 wake_up_all(&cm_id_priv->connect_wait); 838 return -EINVAL; 839 } 840 841 /* Get the ib_qp given the QPN */ 842 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 843 if (!qp) { 844 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 845 return -EINVAL; 846 } 847 cm_id->device->iwcm->add_ref(qp); 848 cm_id_priv->qp = qp; 849 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 850 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 851 852 ret = cm_id->device->iwcm->connect(cm_id, iw_param); 853 if (ret) { 854 spin_lock_irqsave(&cm_id_priv->lock, flags); 855 if (cm_id_priv->qp) { 856 cm_id->device->iwcm->rem_ref(qp); 857 cm_id_priv->qp = NULL; 858 } 859 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 860 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 861 cm_id_priv->state = IW_CM_STATE_IDLE; 862 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 863 wake_up_all(&cm_id_priv->connect_wait); 864 } 865 866 return ret; 867} 868EXPORT_SYMBOL(iw_cm_connect); 869 870/* 871 * Passive Side: new CM_ID <-- CONN_RECV 872 * 873 * Handles an inbound connect request. The function creates a new 874 * iw_cm_id to represent the new connection and inherits the client 875 * callback function and other attributes from the listening parent. 876 * 877 * The work item contains a pointer to the listen_cm_id and the event. The 878 * listen_cm_id contains the client cm_handler, context and 879 * device. These are copied when the device is cloned. The event 880 * contains the new four tuple. 881 * 882 * An error on the child should not affect the parent, so this 883 * function does not return a value. 884 */ 885static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 886 struct iw_cm_event *iw_event) 887{ 888 unsigned long flags; 889 struct iw_cm_id *cm_id; 890 struct iwcm_id_private *cm_id_priv; 891 int ret; 892 893 /* 894 * The provider should never generate a connection request 895 * event with a bad status. 896 */ 897 BUG_ON(iw_event->status); 898 899 /* 900 * We could be destroying the listening id. If so, ignore this 901 * upcall. 902 */ 903 spin_lock_irqsave(&listen_id_priv->lock, flags); 904 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 905 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 906 goto out; 907 } 908 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 909 910 cm_id = iw_create_cm_id(listen_id_priv->id.device, 911 iw_event->so, 912 listen_id_priv->id.cm_handler, 913 listen_id_priv->id.context); 914 /* If the cm_id could not be created, ignore the request */ 915 if (IS_ERR(cm_id)) 916 goto out; 917 918 cm_id->provider_data = iw_event->provider_data; 919 cm_id->local_addr = iw_event->local_addr; 920 cm_id->remote_addr = iw_event->remote_addr; 921 922 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 923 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 924 925 ret = alloc_work_entries(cm_id_priv, 3); 926 if (ret) { 927 iw_cm_reject(cm_id, NULL, 0); 928 iw_destroy_cm_id(cm_id); 929 goto out; 930 } 931 932 /* Call the client CM handler */ 933 ret = cm_id->cm_handler(cm_id, iw_event); 934 if (ret) { 935 iw_cm_reject(cm_id, NULL, 0); 936 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 937 destroy_cm_id(cm_id); 938 if (atomic_read(&cm_id_priv->refcount)==0) 939 free_cm_id(cm_id_priv); 940 } 941 942out: 943 if (iw_event->private_data_len) 944 kfree(iw_event->private_data); 945} 946 947/* 948 * Passive Side: CM_ID <-- ESTABLISHED 949 * 950 * The provider generated an ESTABLISHED event which means that 951 * the MPA negotion has completed successfully and we are now in MPA 952 * FPDU mode. 953 * 954 * This event can only be received in the CONN_RECV state. If the 955 * remote peer closed, the ESTABLISHED event would be received followed 956 * by the CLOSE event. If the app closes, it will block until we wake 957 * it up after processing this event. 958 */ 959static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 960 struct iw_cm_event *iw_event) 961{ 962 unsigned long flags; 963 int ret; 964 965 spin_lock_irqsave(&cm_id_priv->lock, flags); 966 967 /* 968 * We clear the CONNECT_WAIT bit here to allow the callback 969 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 970 * from a callback handler is not allowed. 971 */ 972 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 973 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 974 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 975 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 976 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 977 wake_up_all(&cm_id_priv->connect_wait); 978 979 return ret; 980} 981 982/* 983 * Active Side: CM_ID <-- ESTABLISHED 984 * 985 * The app has called connect and is waiting for the established event to 986 * post it's requests to the server. This event will wake up anyone 987 * blocked in iw_cm_disconnect or iw_destroy_id. 988 */ 989static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 990 struct iw_cm_event *iw_event) 991{ 992 unsigned long flags; 993 int ret; 994 995 spin_lock_irqsave(&cm_id_priv->lock, flags); 996 /* 997 * Clear the connect wait bit so a callback function calling 998 * iw_cm_disconnect will not wait and deadlock this thread 999 */ 1000 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 1001 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 1002 if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) { 1003 cm_id_priv->id.local_addr = iw_event->local_addr; 1004 cm_id_priv->id.remote_addr = iw_event->remote_addr; 1005 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 1006 } else { 1007 /* REJECTED or RESET */ 1008 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 1009 cm_id_priv->qp = NULL; 1010 cm_id_priv->state = IW_CM_STATE_IDLE; 1011 } 1012 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1013 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 1014 1015 if (iw_event->private_data_len) 1016 kfree(iw_event->private_data); 1017 1018 /* Wake up waiters on connect complete */ 1019 wake_up_all(&cm_id_priv->connect_wait); 1020 1021 return ret; 1022} 1023 1024/* 1025 * CM_ID <-- CLOSING 1026 * 1027 * If in the ESTABLISHED state, move to CLOSING. 1028 */ 1029static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 1030 struct iw_cm_event *iw_event) 1031{ 1032 unsigned long flags; 1033 1034 spin_lock_irqsave(&cm_id_priv->lock, flags); 1035 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 1036 cm_id_priv->state = IW_CM_STATE_CLOSING; 1037 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1038} 1039 1040/* 1041 * CM_ID <-- IDLE 1042 * 1043 * If in the ESTBLISHED or CLOSING states, the QP will have have been 1044 * moved by the provider to the ERR state. Disassociate the CM_ID from 1045 * the QP, move to IDLE, and remove the 'connected' reference. 1046 * 1047 * If in some other state, the cm_id was destroyed asynchronously. 1048 * This is the last reference that will result in waking up 1049 * the app thread blocked in iw_destroy_cm_id. 1050 */ 1051static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 1052 struct iw_cm_event *iw_event) 1053{ 1054 unsigned long flags; 1055 int ret = 0; 1056 spin_lock_irqsave(&cm_id_priv->lock, flags); 1057 1058 if (cm_id_priv->qp) { 1059 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 1060 cm_id_priv->qp = NULL; 1061 } 1062 switch (cm_id_priv->state) { 1063 case IW_CM_STATE_ESTABLISHED: 1064 case IW_CM_STATE_CLOSING: 1065 cm_id_priv->state = IW_CM_STATE_IDLE; 1066 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1067 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 1068 spin_lock_irqsave(&cm_id_priv->lock, flags); 1069 break; 1070 case IW_CM_STATE_DESTROYING: 1071 break; 1072 default: 1073 BUG(); 1074 } 1075 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1076 1077 return ret; 1078} 1079 1080static int process_event(struct iwcm_id_private *cm_id_priv, 1081 struct iw_cm_event *iw_event) 1082{ 1083 int ret = 0; 1084 1085 switch (iw_event->event) { 1086 case IW_CM_EVENT_CONNECT_REQUEST: 1087 cm_conn_req_handler(cm_id_priv, iw_event); 1088 break; 1089 case IW_CM_EVENT_CONNECT_REPLY: 1090 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 1091 break; 1092 case IW_CM_EVENT_ESTABLISHED: 1093 ret = cm_conn_est_handler(cm_id_priv, iw_event); 1094 break; 1095 case IW_CM_EVENT_DISCONNECT: 1096 cm_disconnect_handler(cm_id_priv, iw_event); 1097 break; 1098 case IW_CM_EVENT_CLOSE: 1099 ret = cm_close_handler(cm_id_priv, iw_event); 1100 break; 1101 default: 1102 BUG(); 1103 } 1104 1105 return ret; 1106} 1107 1108/* 1109 * Process events on the work_list for the cm_id. If the callback 1110 * function requests that the cm_id be deleted, a flag is set in the 1111 * cm_id flags to indicate that when the last reference is 1112 * removed, the cm_id is to be destroyed. This is necessary to 1113 * distinguish between an object that will be destroyed by the app 1114 * thread asleep on the destroy_comp list vs. an object destroyed 1115 * here synchronously when the last reference is removed. 1116 */ 1117static void cm_work_handler(struct work_struct *_work) 1118{ 1119 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 1120 struct iw_cm_event levent; 1121 struct iwcm_id_private *cm_id_priv = work->cm_id; 1122 unsigned long flags; 1123 int empty; 1124 int ret = 0; 1125 int destroy_id; 1126 1127 spin_lock_irqsave(&cm_id_priv->lock, flags); 1128 empty = list_empty(&cm_id_priv->work_list); 1129 while (!empty) { 1130 work = list_entry(cm_id_priv->work_list.next, 1131 struct iwcm_work, list); 1132 list_del_init(&work->list); 1133 empty = list_empty(&cm_id_priv->work_list); 1134 levent = work->event; 1135 put_work(work); 1136 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1137 1138 ret = process_event(cm_id_priv, &levent); 1139 if (ret) { 1140 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 1141 destroy_cm_id(&cm_id_priv->id); 1142 } 1143 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 1144 destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 1145 if (iwcm_deref_id(cm_id_priv)) { 1146 if (destroy_id) { 1147 BUG_ON(!list_empty(&cm_id_priv->work_list)); 1148 free_cm_id(cm_id_priv); 1149 } 1150 return; 1151 } 1152 if (empty) 1153 return; 1154 spin_lock_irqsave(&cm_id_priv->lock, flags); 1155 } 1156 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1157} 1158 1159/* 1160 * This function is called on interrupt context. Schedule events on 1161 * the iwcm_wq thread to allow callback functions to downcall into 1162 * the CM and/or block. Events are queued to a per-CM_ID 1163 * work_list. If this is the first event on the work_list, the work 1164 * element is also queued on the iwcm_wq thread. 1165 * 1166 * Each event holds a reference on the cm_id. Until the last posted 1167 * event has been delivered and processed, the cm_id cannot be 1168 * deleted. 1169 * 1170 * Returns: 1171 * 0 - the event was handled. 1172 * -ENOMEM - the event was not handled due to lack of resources. 1173 */ 1174static int cm_event_handler(struct iw_cm_id *cm_id, 1175 struct iw_cm_event *iw_event) 1176{ 1177 struct iwcm_work *work; 1178 struct iwcm_id_private *cm_id_priv; 1179 unsigned long flags; 1180 int ret = 0; 1181 1182 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1183 1184 spin_lock_irqsave(&cm_id_priv->lock, flags); 1185 work = get_work(cm_id_priv); 1186 if (!work) { 1187 ret = -ENOMEM; 1188 goto out; 1189 } 1190 1191 INIT_WORK(&work->work, cm_work_handler); 1192 work->cm_id = cm_id_priv; 1193 work->event = *iw_event; 1194 1195 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1196 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1197 work->event.private_data_len) { 1198 ret = copy_private_data(&work->event); 1199 if (ret) { 1200 put_work(work); 1201 goto out; 1202 } 1203 } 1204 1205 atomic_inc(&cm_id_priv->refcount); 1206 if (list_empty(&cm_id_priv->work_list)) { 1207 list_add_tail(&work->list, &cm_id_priv->work_list); 1208 queue_work(iwcm_wq, &work->work); 1209 } else 1210 list_add_tail(&work->list, &cm_id_priv->work_list); 1211out: 1212 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1213 return ret; 1214} 1215 1216static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1217 struct ib_qp_attr *qp_attr, 1218 int *qp_attr_mask) 1219{ 1220 unsigned long flags; 1221 int ret; 1222 1223 spin_lock_irqsave(&cm_id_priv->lock, flags); 1224 switch (cm_id_priv->state) { 1225 case IW_CM_STATE_IDLE: 1226 case IW_CM_STATE_CONN_SENT: 1227 case IW_CM_STATE_CONN_RECV: 1228 case IW_CM_STATE_ESTABLISHED: 1229 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1230 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1231 IB_ACCESS_REMOTE_READ; 1232 ret = 0; 1233 break; 1234 default: 1235 ret = -EINVAL; 1236 break; 1237 } 1238 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1239 return ret; 1240} 1241 1242static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1243 struct ib_qp_attr *qp_attr, 1244 int *qp_attr_mask) 1245{ 1246 unsigned long flags; 1247 int ret; 1248 1249 spin_lock_irqsave(&cm_id_priv->lock, flags); 1250 switch (cm_id_priv->state) { 1251 case IW_CM_STATE_IDLE: 1252 case IW_CM_STATE_CONN_SENT: 1253 case IW_CM_STATE_CONN_RECV: 1254 case IW_CM_STATE_ESTABLISHED: 1255 *qp_attr_mask = 0; 1256 ret = 0; 1257 break; 1258 default: 1259 ret = -EINVAL; 1260 break; 1261 } 1262 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1263 return ret; 1264} 1265 1266int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1267 struct ib_qp_attr *qp_attr, 1268 int *qp_attr_mask) 1269{ 1270 struct iwcm_id_private *cm_id_priv; 1271 int ret; 1272 1273 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1274 switch (qp_attr->qp_state) { 1275 case IB_QPS_INIT: 1276 case IB_QPS_RTR: 1277 ret = iwcm_init_qp_init_attr(cm_id_priv, 1278 qp_attr, qp_attr_mask); 1279 break; 1280 case IB_QPS_RTS: 1281 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1282 qp_attr, qp_attr_mask); 1283 break; 1284 default: 1285 ret = -EINVAL; 1286 break; 1287 } 1288 return ret; 1289} 1290EXPORT_SYMBOL(iw_cm_init_qp_attr); 1291 1292static int __init iw_cm_init(void) 1293{ 1294 iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); 1295 if (!iwcm_wq) 1296 return -ENOMEM; 1297 1298 return 0; 1299} 1300 1301static void __exit iw_cm_cleanup(void) 1302{ 1303 destroy_workqueue(iwcm_wq); 1304} 1305 1306module_init(iw_cm_init); 1307module_exit(iw_cm_cleanup); 1308