ib_cma.c revision 331769
1/* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#define LINUXKPI_PARAM_PREFIX ibcore_ 37 38#include <linux/completion.h> 39#include <linux/in.h> 40#include <linux/in6.h> 41#include <linux/mutex.h> 42#include <linux/random.h> 43#include <linux/idr.h> 44#include <linux/inetdevice.h> 45#include <linux/slab.h> 46#include <linux/module.h> 47#include <net/route.h> 48 49#include <net/tcp.h> 50#include <net/ipv6.h> 51 52#include <netinet6/scope6_var.h> 53#include <netinet6/ip6_var.h> 54 55#include <rdma/rdma_cm.h> 56#include <rdma/rdma_cm_ib.h> 57#include <rdma/ib.h> 58#include <rdma/ib_addr.h> 59#include <rdma/ib_cache.h> 60#include <rdma/ib_cm.h> 61#include <rdma/ib_sa.h> 62#include <rdma/iw_cm.h> 63 64#include <sys/priv.h> 65 66#include "core_priv.h" 67 68MODULE_AUTHOR("Sean Hefty"); 69MODULE_DESCRIPTION("Generic RDMA CM Agent"); 70MODULE_LICENSE("Dual BSD/GPL"); 71 72#define CMA_CM_RESPONSE_TIMEOUT 20 73#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 74#define CMA_MAX_CM_RETRIES 15 75#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 76#define CMA_IBOE_PACKET_LIFETIME 18 77 78static const char * const cma_events[] = { 79 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 80 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 81 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 82 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 83 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 84 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 85 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 86 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 87 [RDMA_CM_EVENT_REJECTED] = "rejected", 88 [RDMA_CM_EVENT_ESTABLISHED] = "established", 89 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 90 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 91 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 92 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 93 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 94 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 95}; 96 97const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 98{ 99 size_t index = event; 100 101 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 102 cma_events[index] : "unrecognized event"; 103} 104EXPORT_SYMBOL(rdma_event_msg); 105 106static void cma_add_one(struct ib_device *device); 107static void cma_remove_one(struct ib_device *device, void *client_data); 108 109static struct ib_client cma_client = { 110 .name = "cma", 111 .add = cma_add_one, 112 .remove = cma_remove_one 113}; 114 115static struct ib_sa_client sa_client; 116static struct rdma_addr_client addr_client; 117static LIST_HEAD(dev_list); 118static LIST_HEAD(listen_any_list); 119static DEFINE_MUTEX(lock); 120static struct workqueue_struct *cma_wq; 121 122struct cma_pernet { 123 struct idr tcp_ps; 124 struct idr udp_ps; 125 struct idr ipoib_ps; 126 struct idr ib_ps; 127}; 128 129VNET_DEFINE(struct cma_pernet, cma_pernet); 130 131static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet) 132{ 133 struct cma_pernet *retval; 134 135 CURVNET_SET_QUIET(vnet); 136 retval = &VNET(cma_pernet); 137 CURVNET_RESTORE(); 138 139 return (retval); 140} 141 142static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps) 143{ 144 struct cma_pernet *pernet = cma_pernet_ptr(net); 145 146 switch (ps) { 147 case RDMA_PS_TCP: 148 return &pernet->tcp_ps; 149 case RDMA_PS_UDP: 150 return &pernet->udp_ps; 151 case RDMA_PS_IPOIB: 152 return &pernet->ipoib_ps; 153 case RDMA_PS_IB: 154 return &pernet->ib_ps; 155 default: 156 return NULL; 157 } 158} 159 160struct cma_device { 161 struct list_head list; 162 struct ib_device *device; 163 struct completion comp; 164 atomic_t refcount; 165 struct list_head id_list; 166 struct sysctl_ctx_list sysctl_ctx; 167 enum ib_gid_type *default_gid_type; 168}; 169 170struct rdma_bind_list { 171 enum rdma_port_space ps; 172 struct hlist_head owners; 173 unsigned short port; 174}; 175 176struct class_port_info_context { 177 struct ib_class_port_info *class_port_info; 178 struct ib_device *device; 179 struct completion done; 180 struct ib_sa_query *sa_query; 181 u8 port_num; 182}; 183 184static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps, 185 struct rdma_bind_list *bind_list, int snum) 186{ 187 struct idr *idr = cma_pernet_idr(vnet, ps); 188 189 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 190} 191 192static struct rdma_bind_list *cma_ps_find(struct vnet *net, 193 enum rdma_port_space ps, int snum) 194{ 195 struct idr *idr = cma_pernet_idr(net, ps); 196 197 return idr_find(idr, snum); 198} 199 200static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum) 201{ 202 struct idr *idr = cma_pernet_idr(net, ps); 203 204 idr_remove(idr, snum); 205} 206 207enum { 208 CMA_OPTION_AFONLY, 209}; 210 211void cma_ref_dev(struct cma_device *cma_dev) 212{ 213 atomic_inc(&cma_dev->refcount); 214} 215 216struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 217 void *cookie) 218{ 219 struct cma_device *cma_dev; 220 struct cma_device *found_cma_dev = NULL; 221 222 mutex_lock(&lock); 223 224 list_for_each_entry(cma_dev, &dev_list, list) 225 if (filter(cma_dev->device, cookie)) { 226 found_cma_dev = cma_dev; 227 break; 228 } 229 230 if (found_cma_dev) 231 cma_ref_dev(found_cma_dev); 232 mutex_unlock(&lock); 233 return found_cma_dev; 234} 235 236int cma_get_default_gid_type(struct cma_device *cma_dev, 237 unsigned int port) 238{ 239 if (port < rdma_start_port(cma_dev->device) || 240 port > rdma_end_port(cma_dev->device)) 241 return -EINVAL; 242 243 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 244} 245 246int cma_set_default_gid_type(struct cma_device *cma_dev, 247 unsigned int port, 248 enum ib_gid_type default_gid_type) 249{ 250 unsigned long supported_gids; 251 252 if (port < rdma_start_port(cma_dev->device) || 253 port > rdma_end_port(cma_dev->device)) 254 return -EINVAL; 255 256 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 257 258 if (!(supported_gids & 1 << default_gid_type)) 259 return -EINVAL; 260 261 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 262 default_gid_type; 263 264 return 0; 265} 266 267struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 268{ 269 return cma_dev->device; 270} 271 272/* 273 * Device removal can occur at anytime, so we need extra handling to 274 * serialize notifying the user of device removal with other callbacks. 275 * We do this by disabling removal notification while a callback is in process, 276 * and reporting it after the callback completes. 277 */ 278struct rdma_id_private { 279 struct rdma_cm_id id; 280 281 struct rdma_bind_list *bind_list; 282 struct hlist_node node; 283 struct list_head list; /* listen_any_list or cma_device.list */ 284 struct list_head listen_list; /* per device listens */ 285 struct cma_device *cma_dev; 286 struct list_head mc_list; 287 288 int internal_id; 289 enum rdma_cm_state state; 290 spinlock_t lock; 291 struct mutex qp_mutex; 292 293 struct completion comp; 294 atomic_t refcount; 295 struct mutex handler_mutex; 296 297 int backlog; 298 int timeout_ms; 299 struct ib_sa_query *query; 300 int query_id; 301 union { 302 struct ib_cm_id *ib; 303 struct iw_cm_id *iw; 304 } cm_id; 305 306 u32 seq_num; 307 u32 qkey; 308 u32 qp_num; 309 pid_t owner; 310 u32 options; 311 u8 srq; 312 u8 tos; 313 u8 reuseaddr; 314 u8 afonly; 315 enum ib_gid_type gid_type; 316}; 317 318struct cma_multicast { 319 struct rdma_id_private *id_priv; 320 union { 321 struct ib_sa_multicast *ib; 322 } multicast; 323 struct list_head list; 324 void *context; 325 struct sockaddr_storage addr; 326 struct kref mcref; 327 bool igmp_joined; 328 u8 join_state; 329}; 330 331struct cma_work { 332 struct work_struct work; 333 struct rdma_id_private *id; 334 enum rdma_cm_state old_state; 335 enum rdma_cm_state new_state; 336 struct rdma_cm_event event; 337}; 338 339struct cma_ndev_work { 340 struct work_struct work; 341 struct rdma_id_private *id; 342 struct rdma_cm_event event; 343}; 344 345struct iboe_mcast_work { 346 struct work_struct work; 347 struct rdma_id_private *id; 348 struct cma_multicast *mc; 349}; 350 351union cma_ip_addr { 352 struct in6_addr ip6; 353 struct { 354 __be32 pad[3]; 355 __be32 addr; 356 } ip4; 357}; 358 359struct cma_hdr { 360 u8 cma_version; 361 u8 ip_version; /* IP version: 7:4 */ 362 __be16 port; 363 union cma_ip_addr src_addr; 364 union cma_ip_addr dst_addr; 365}; 366 367#define CMA_VERSION 0x00 368 369struct cma_req_info { 370 struct ib_device *device; 371 int port; 372 union ib_gid local_gid; 373 __be64 service_id; 374 u16 pkey; 375 bool has_gid:1; 376}; 377 378static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 379{ 380 unsigned long flags; 381 int ret; 382 383 spin_lock_irqsave(&id_priv->lock, flags); 384 ret = (id_priv->state == comp); 385 spin_unlock_irqrestore(&id_priv->lock, flags); 386 return ret; 387} 388 389static int cma_comp_exch(struct rdma_id_private *id_priv, 390 enum rdma_cm_state comp, enum rdma_cm_state exch) 391{ 392 unsigned long flags; 393 int ret; 394 395 spin_lock_irqsave(&id_priv->lock, flags); 396 if ((ret = (id_priv->state == comp))) 397 id_priv->state = exch; 398 spin_unlock_irqrestore(&id_priv->lock, flags); 399 return ret; 400} 401 402static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 403 enum rdma_cm_state exch) 404{ 405 unsigned long flags; 406 enum rdma_cm_state old; 407 408 spin_lock_irqsave(&id_priv->lock, flags); 409 old = id_priv->state; 410 id_priv->state = exch; 411 spin_unlock_irqrestore(&id_priv->lock, flags); 412 return old; 413} 414 415static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 416{ 417 return hdr->ip_version >> 4; 418} 419 420static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 421{ 422 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 423} 424 425static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 426 struct cma_device *cma_dev) 427{ 428 cma_ref_dev(cma_dev); 429 id_priv->cma_dev = cma_dev; 430 id_priv->gid_type = 0; 431 id_priv->id.device = cma_dev->device; 432 id_priv->id.route.addr.dev_addr.transport = 433 rdma_node_get_transport(cma_dev->device->node_type); 434 list_add_tail(&id_priv->list, &cma_dev->id_list); 435} 436 437static void cma_attach_to_dev(struct rdma_id_private *id_priv, 438 struct cma_device *cma_dev) 439{ 440 _cma_attach_to_dev(id_priv, cma_dev); 441 id_priv->gid_type = 442 cma_dev->default_gid_type[id_priv->id.port_num - 443 rdma_start_port(cma_dev->device)]; 444} 445 446void cma_deref_dev(struct cma_device *cma_dev) 447{ 448 if (atomic_dec_and_test(&cma_dev->refcount)) 449 complete(&cma_dev->comp); 450} 451 452static inline void release_mc(struct kref *kref) 453{ 454 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 455 456 kfree(mc->multicast.ib); 457 kfree(mc); 458} 459 460static void cma_release_dev(struct rdma_id_private *id_priv) 461{ 462 mutex_lock(&lock); 463 list_del(&id_priv->list); 464 cma_deref_dev(id_priv->cma_dev); 465 id_priv->cma_dev = NULL; 466 mutex_unlock(&lock); 467} 468 469static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 470{ 471 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 472} 473 474static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 475{ 476 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 477} 478 479static inline unsigned short cma_family(struct rdma_id_private *id_priv) 480{ 481 return id_priv->id.route.addr.src_addr.ss_family; 482} 483 484static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 485{ 486 struct ib_sa_mcmember_rec rec; 487 int ret = 0; 488 489 if (id_priv->qkey) { 490 if (qkey && id_priv->qkey != qkey) 491 return -EINVAL; 492 return 0; 493 } 494 495 if (qkey) { 496 id_priv->qkey = qkey; 497 return 0; 498 } 499 500 switch (id_priv->id.ps) { 501 case RDMA_PS_UDP: 502 case RDMA_PS_IB: 503 id_priv->qkey = RDMA_UDP_QKEY; 504 break; 505 case RDMA_PS_IPOIB: 506 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 507 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 508 id_priv->id.port_num, &rec.mgid, 509 &rec); 510 if (!ret) 511 id_priv->qkey = be32_to_cpu(rec.qkey); 512 break; 513 default: 514 break; 515 } 516 return ret; 517} 518 519static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 520{ 521 dev_addr->dev_type = ARPHRD_INFINIBAND; 522 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 523 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 524} 525 526static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 527{ 528 int ret; 529 530 if (addr->sa_family != AF_IB) { 531 ret = rdma_translate_ip(addr, dev_addr, NULL); 532 } else { 533 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 534 ret = 0; 535 } 536 537 return ret; 538} 539 540static inline int cma_validate_port(struct ib_device *device, u8 port, 541 enum ib_gid_type gid_type, 542 union ib_gid *gid, int dev_type, 543 struct vnet *net, 544 int bound_if_index) 545{ 546 int ret = -ENODEV; 547 struct net_device *ndev = NULL; 548 549 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 550 return ret; 551 552 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 553 return ret; 554 555 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 556 ndev = dev_get_by_index(net, bound_if_index); 557 if (ndev && ndev->if_flags & IFF_LOOPBACK) { 558 pr_info("detected loopback device\n"); 559 dev_put(ndev); 560 561 if (!device->get_netdev) 562 return -EOPNOTSUPP; 563 564 ndev = device->get_netdev(device, port); 565 if (!ndev) 566 return -ENODEV; 567 } 568 } else { 569 gid_type = IB_GID_TYPE_IB; 570 } 571 572 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 573 ndev, NULL); 574 575 if (ndev) 576 dev_put(ndev); 577 578 return ret; 579} 580 581static int cma_acquire_dev(struct rdma_id_private *id_priv, 582 struct rdma_id_private *listen_id_priv) 583{ 584 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 585 struct cma_device *cma_dev; 586 union ib_gid gid, iboe_gid, *gidp; 587 int ret = -ENODEV; 588 u8 port; 589 590 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 591 id_priv->id.ps == RDMA_PS_IPOIB) 592 return -EINVAL; 593 594 mutex_lock(&lock); 595 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 596 &iboe_gid); 597 598 memcpy(&gid, dev_addr->src_dev_addr + 599 rdma_addr_gid_offset(dev_addr), sizeof gid); 600 601 if (listen_id_priv) { 602 cma_dev = listen_id_priv->cma_dev; 603 port = listen_id_priv->id.port_num; 604 gidp = rdma_protocol_roce(cma_dev->device, port) ? 605 &iboe_gid : &gid; 606 607 ret = cma_validate_port(cma_dev->device, port, 608 rdma_protocol_ib(cma_dev->device, port) ? 609 IB_GID_TYPE_IB : 610 listen_id_priv->gid_type, gidp, 611 dev_addr->dev_type, 612 dev_addr->net, 613 dev_addr->bound_dev_if); 614 if (!ret) { 615 id_priv->id.port_num = port; 616 goto out; 617 } 618 } 619 620 list_for_each_entry(cma_dev, &dev_list, list) { 621 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 622 if (listen_id_priv && 623 listen_id_priv->cma_dev == cma_dev && 624 listen_id_priv->id.port_num == port) 625 continue; 626 627 gidp = rdma_protocol_roce(cma_dev->device, port) ? 628 &iboe_gid : &gid; 629 630 ret = cma_validate_port(cma_dev->device, port, 631 rdma_protocol_ib(cma_dev->device, port) ? 632 IB_GID_TYPE_IB : 633 cma_dev->default_gid_type[port - 1], 634 gidp, dev_addr->dev_type, 635 dev_addr->net, 636 dev_addr->bound_dev_if); 637 if (!ret) { 638 id_priv->id.port_num = port; 639 goto out; 640 } 641 } 642 } 643 644out: 645 if (!ret) 646 cma_attach_to_dev(id_priv, cma_dev); 647 648 mutex_unlock(&lock); 649 return ret; 650} 651 652/* 653 * Select the source IB device and address to reach the destination IB address. 654 */ 655static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 656{ 657 struct cma_device *cma_dev, *cur_dev; 658 struct sockaddr_ib *addr; 659 union ib_gid gid, sgid, *dgid; 660 u16 pkey, index; 661 u8 p; 662 int i; 663 664 cma_dev = NULL; 665 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 666 dgid = (union ib_gid *) &addr->sib_addr; 667 pkey = ntohs(addr->sib_pkey); 668 669 list_for_each_entry(cur_dev, &dev_list, list) { 670 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 671 if (!rdma_cap_af_ib(cur_dev->device, p)) 672 continue; 673 674 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 675 continue; 676 677 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 678 &gid, NULL); 679 i++) { 680 if (!memcmp(&gid, dgid, sizeof(gid))) { 681 cma_dev = cur_dev; 682 sgid = gid; 683 id_priv->id.port_num = p; 684 goto found; 685 } 686 687 if (!cma_dev && (gid.global.subnet_prefix == 688 dgid->global.subnet_prefix)) { 689 cma_dev = cur_dev; 690 sgid = gid; 691 id_priv->id.port_num = p; 692 } 693 } 694 } 695 } 696 697 if (!cma_dev) 698 return -ENODEV; 699 700found: 701 cma_attach_to_dev(id_priv, cma_dev); 702 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 703 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 704 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 705 return 0; 706} 707 708static void cma_deref_id(struct rdma_id_private *id_priv) 709{ 710 if (atomic_dec_and_test(&id_priv->refcount)) 711 complete(&id_priv->comp); 712} 713 714struct rdma_cm_id *rdma_create_id(struct vnet *net, 715 rdma_cm_event_handler event_handler, 716 void *context, enum rdma_port_space ps, 717 enum ib_qp_type qp_type) 718{ 719 struct rdma_id_private *id_priv; 720 721 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 722 if (!id_priv) 723 return ERR_PTR(-ENOMEM); 724 725 id_priv->owner = task_pid_nr(current); 726 id_priv->state = RDMA_CM_IDLE; 727 id_priv->id.context = context; 728 id_priv->id.event_handler = event_handler; 729 id_priv->id.ps = ps; 730 id_priv->id.qp_type = qp_type; 731 spin_lock_init(&id_priv->lock); 732 mutex_init(&id_priv->qp_mutex); 733 init_completion(&id_priv->comp); 734 atomic_set(&id_priv->refcount, 1); 735 mutex_init(&id_priv->handler_mutex); 736 INIT_LIST_HEAD(&id_priv->listen_list); 737 INIT_LIST_HEAD(&id_priv->mc_list); 738 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 739 id_priv->id.route.addr.dev_addr.net = TD_TO_VNET(curthread); 740 741 return &id_priv->id; 742} 743EXPORT_SYMBOL(rdma_create_id); 744 745static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 746{ 747 struct ib_qp_attr qp_attr; 748 int qp_attr_mask, ret; 749 750 qp_attr.qp_state = IB_QPS_INIT; 751 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 752 if (ret) 753 return ret; 754 755 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 756 if (ret) 757 return ret; 758 759 qp_attr.qp_state = IB_QPS_RTR; 760 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 761 if (ret) 762 return ret; 763 764 qp_attr.qp_state = IB_QPS_RTS; 765 qp_attr.sq_psn = 0; 766 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 767 768 return ret; 769} 770 771static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 772{ 773 struct ib_qp_attr qp_attr; 774 int qp_attr_mask, ret; 775 776 qp_attr.qp_state = IB_QPS_INIT; 777 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 778 if (ret) 779 return ret; 780 781 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 782} 783 784int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 785 struct ib_qp_init_attr *qp_init_attr) 786{ 787 struct rdma_id_private *id_priv; 788 struct ib_qp *qp; 789 int ret; 790 791 id_priv = container_of(id, struct rdma_id_private, id); 792 if (id->device != pd->device) 793 return -EINVAL; 794 795 qp_init_attr->port_num = id->port_num; 796 qp = ib_create_qp(pd, qp_init_attr); 797 if (IS_ERR(qp)) 798 return PTR_ERR(qp); 799 800 if (id->qp_type == IB_QPT_UD) 801 ret = cma_init_ud_qp(id_priv, qp); 802 else 803 ret = cma_init_conn_qp(id_priv, qp); 804 if (ret) 805 goto err; 806 807 id->qp = qp; 808 id_priv->qp_num = qp->qp_num; 809 id_priv->srq = (qp->srq != NULL); 810 return 0; 811err: 812 ib_destroy_qp(qp); 813 return ret; 814} 815EXPORT_SYMBOL(rdma_create_qp); 816 817void rdma_destroy_qp(struct rdma_cm_id *id) 818{ 819 struct rdma_id_private *id_priv; 820 821 id_priv = container_of(id, struct rdma_id_private, id); 822 mutex_lock(&id_priv->qp_mutex); 823 ib_destroy_qp(id_priv->id.qp); 824 id_priv->id.qp = NULL; 825 mutex_unlock(&id_priv->qp_mutex); 826} 827EXPORT_SYMBOL(rdma_destroy_qp); 828 829static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 830 struct rdma_conn_param *conn_param) 831{ 832 struct ib_qp_attr qp_attr; 833 int qp_attr_mask, ret; 834 union ib_gid sgid; 835 836 mutex_lock(&id_priv->qp_mutex); 837 if (!id_priv->id.qp) { 838 ret = 0; 839 goto out; 840 } 841 842 /* Need to update QP attributes from default values. */ 843 qp_attr.qp_state = IB_QPS_INIT; 844 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 845 if (ret) 846 goto out; 847 848 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 849 if (ret) 850 goto out; 851 852 qp_attr.qp_state = IB_QPS_RTR; 853 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 854 if (ret) 855 goto out; 856 857 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 858 qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); 859 if (ret) 860 goto out; 861 862 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 863 864 if (conn_param) 865 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 866 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 867out: 868 mutex_unlock(&id_priv->qp_mutex); 869 return ret; 870} 871 872static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 873 struct rdma_conn_param *conn_param) 874{ 875 struct ib_qp_attr qp_attr; 876 int qp_attr_mask, ret; 877 878 mutex_lock(&id_priv->qp_mutex); 879 if (!id_priv->id.qp) { 880 ret = 0; 881 goto out; 882 } 883 884 qp_attr.qp_state = IB_QPS_RTS; 885 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 886 if (ret) 887 goto out; 888 889 if (conn_param) 890 qp_attr.max_rd_atomic = conn_param->initiator_depth; 891 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 892out: 893 mutex_unlock(&id_priv->qp_mutex); 894 return ret; 895} 896 897static int cma_modify_qp_err(struct rdma_id_private *id_priv) 898{ 899 struct ib_qp_attr qp_attr; 900 int ret; 901 902 mutex_lock(&id_priv->qp_mutex); 903 if (!id_priv->id.qp) { 904 ret = 0; 905 goto out; 906 } 907 908 qp_attr.qp_state = IB_QPS_ERR; 909 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 910out: 911 mutex_unlock(&id_priv->qp_mutex); 912 return ret; 913} 914 915static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 916 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 917{ 918 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 919 int ret; 920 u16 pkey; 921 922 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 923 pkey = 0xffff; 924 else 925 pkey = ib_addr_get_pkey(dev_addr); 926 927 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 928 pkey, &qp_attr->pkey_index); 929 if (ret) 930 return ret; 931 932 qp_attr->port_num = id_priv->id.port_num; 933 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 934 935 if (id_priv->id.qp_type == IB_QPT_UD) { 936 ret = cma_set_qkey(id_priv, 0); 937 if (ret) 938 return ret; 939 940 qp_attr->qkey = id_priv->qkey; 941 *qp_attr_mask |= IB_QP_QKEY; 942 } else { 943 qp_attr->qp_access_flags = 0; 944 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 945 } 946 return 0; 947} 948 949int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 950 int *qp_attr_mask) 951{ 952 struct rdma_id_private *id_priv; 953 int ret = 0; 954 955 id_priv = container_of(id, struct rdma_id_private, id); 956 if (rdma_cap_ib_cm(id->device, id->port_num)) { 957 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 958 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 959 else 960 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 961 qp_attr_mask); 962 963 if (qp_attr->qp_state == IB_QPS_RTR) 964 qp_attr->rq_psn = id_priv->seq_num; 965 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 966 if (!id_priv->cm_id.iw) { 967 qp_attr->qp_access_flags = 0; 968 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 969 } else 970 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 971 qp_attr_mask); 972 } else 973 ret = -ENOSYS; 974 975 return ret; 976} 977EXPORT_SYMBOL(rdma_init_qp_attr); 978 979static inline int cma_zero_addr(struct sockaddr *addr) 980{ 981 switch (addr->sa_family) { 982 case AF_INET: 983 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 984 case AF_INET6: 985 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 986 case AF_IB: 987 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 988 default: 989 return 0; 990 } 991} 992 993static inline int cma_loopback_addr(struct sockaddr *addr) 994{ 995 switch (addr->sa_family) { 996 case AF_INET: 997 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 998 case AF_INET6: 999 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1000 case AF_IB: 1001 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1002 default: 1003 return 0; 1004 } 1005} 1006 1007static inline int cma_any_addr(struct sockaddr *addr) 1008{ 1009 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1010} 1011 1012static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1013{ 1014 if (src->sa_family != dst->sa_family) 1015 return -1; 1016 1017 switch (src->sa_family) { 1018 case AF_INET: 1019 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1020 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1021 case AF_INET6: 1022 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1023 &((struct sockaddr_in6 *) dst)->sin6_addr); 1024 default: 1025 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1026 &((struct sockaddr_ib *) dst)->sib_addr); 1027 } 1028} 1029 1030static __be16 cma_port(struct sockaddr *addr) 1031{ 1032 struct sockaddr_ib *sib; 1033 1034 switch (addr->sa_family) { 1035 case AF_INET: 1036 return ((struct sockaddr_in *) addr)->sin_port; 1037 case AF_INET6: 1038 return ((struct sockaddr_in6 *) addr)->sin6_port; 1039 case AF_IB: 1040 sib = (struct sockaddr_ib *) addr; 1041 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1042 be64_to_cpu(sib->sib_sid_mask))); 1043 default: 1044 return 0; 1045 } 1046} 1047 1048static inline int cma_any_port(struct sockaddr *addr) 1049{ 1050 return !cma_port(addr); 1051} 1052 1053static void cma_save_ib_info(struct sockaddr *src_addr, 1054 struct sockaddr *dst_addr, 1055 struct rdma_cm_id *listen_id, 1056 struct ib_sa_path_rec *path) 1057{ 1058 struct sockaddr_ib *listen_ib, *ib; 1059 1060 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1061 if (src_addr) { 1062 ib = (struct sockaddr_ib *)src_addr; 1063 ib->sib_family = AF_IB; 1064 if (path) { 1065 ib->sib_pkey = path->pkey; 1066 ib->sib_flowinfo = path->flow_label; 1067 memcpy(&ib->sib_addr, &path->sgid, 16); 1068 ib->sib_sid = path->service_id; 1069 ib->sib_scope_id = 0; 1070 } else { 1071 ib->sib_pkey = listen_ib->sib_pkey; 1072 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1073 ib->sib_addr = listen_ib->sib_addr; 1074 ib->sib_sid = listen_ib->sib_sid; 1075 ib->sib_scope_id = listen_ib->sib_scope_id; 1076 } 1077 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1078 } 1079 if (dst_addr) { 1080 ib = (struct sockaddr_ib *)dst_addr; 1081 ib->sib_family = AF_IB; 1082 if (path) { 1083 ib->sib_pkey = path->pkey; 1084 ib->sib_flowinfo = path->flow_label; 1085 memcpy(&ib->sib_addr, &path->dgid, 16); 1086 } 1087 } 1088} 1089 1090static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1091 struct sockaddr_in *dst_addr, 1092 struct cma_hdr *hdr, 1093 __be16 local_port) 1094{ 1095 if (src_addr) { 1096 *src_addr = (struct sockaddr_in) { 1097 .sin_len = sizeof(struct sockaddr_in), 1098 .sin_family = AF_INET, 1099 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1100 .sin_port = local_port, 1101 }; 1102 } 1103 1104 if (dst_addr) { 1105 *dst_addr = (struct sockaddr_in) { 1106 .sin_len = sizeof(struct sockaddr_in), 1107 .sin_family = AF_INET, 1108 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1109 .sin_port = hdr->port, 1110 }; 1111 } 1112} 1113 1114static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1115 struct sockaddr_in6 *dst_addr, 1116 struct cma_hdr *hdr, 1117 __be16 local_port) 1118{ 1119 if (src_addr) { 1120 *src_addr = (struct sockaddr_in6) { 1121 .sin6_len = sizeof(struct sockaddr_in6), 1122 .sin6_family = AF_INET6, 1123 .sin6_addr = hdr->dst_addr.ip6, 1124 .sin6_port = local_port, 1125 }; 1126 } 1127 1128 if (dst_addr) { 1129 *dst_addr = (struct sockaddr_in6) { 1130 .sin6_len = sizeof(struct sockaddr_in6), 1131 .sin6_family = AF_INET6, 1132 .sin6_addr = hdr->src_addr.ip6, 1133 .sin6_port = hdr->port, 1134 }; 1135 } 1136} 1137 1138static u16 cma_port_from_service_id(__be64 service_id) 1139{ 1140 return (u16)be64_to_cpu(service_id); 1141} 1142 1143static int cma_save_ip_info(struct sockaddr *src_addr, 1144 struct sockaddr *dst_addr, 1145 struct ib_cm_event *ib_event, 1146 __be64 service_id) 1147{ 1148 struct cma_hdr *hdr; 1149 __be16 port; 1150 1151 hdr = ib_event->private_data; 1152 if (hdr->cma_version != CMA_VERSION) 1153 return -EINVAL; 1154 1155 port = htons(cma_port_from_service_id(service_id)); 1156 1157 switch (cma_get_ip_ver(hdr)) { 1158 case 4: 1159 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1160 (struct sockaddr_in *)dst_addr, hdr, port); 1161 break; 1162 case 6: 1163 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1164 (struct sockaddr_in6 *)dst_addr, hdr, port); 1165 break; 1166 default: 1167 return -EAFNOSUPPORT; 1168 } 1169 1170 return 0; 1171} 1172 1173static int cma_save_net_info(struct sockaddr *src_addr, 1174 struct sockaddr *dst_addr, 1175 struct rdma_cm_id *listen_id, 1176 struct ib_cm_event *ib_event, 1177 sa_family_t sa_family, __be64 service_id) 1178{ 1179 if (sa_family == AF_IB) { 1180 if (ib_event->event == IB_CM_REQ_RECEIVED) 1181 cma_save_ib_info(src_addr, dst_addr, listen_id, 1182 ib_event->param.req_rcvd.primary_path); 1183 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1184 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1185 return 0; 1186 } 1187 1188 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1189} 1190 1191static int cma_save_req_info(const struct ib_cm_event *ib_event, 1192 struct cma_req_info *req) 1193{ 1194 const struct ib_cm_req_event_param *req_param = 1195 &ib_event->param.req_rcvd; 1196 const struct ib_cm_sidr_req_event_param *sidr_param = 1197 &ib_event->param.sidr_req_rcvd; 1198 1199 switch (ib_event->event) { 1200 case IB_CM_REQ_RECEIVED: 1201 req->device = req_param->listen_id->device; 1202 req->port = req_param->port; 1203 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1204 sizeof(req->local_gid)); 1205 req->has_gid = true; 1206 req->service_id = req_param->primary_path->service_id; 1207 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1208 if (req->pkey != req_param->bth_pkey) 1209 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1210 "RDMA CMA: in the future this may cause the request to be dropped\n", 1211 req_param->bth_pkey, req->pkey); 1212 break; 1213 case IB_CM_SIDR_REQ_RECEIVED: 1214 req->device = sidr_param->listen_id->device; 1215 req->port = sidr_param->port; 1216 req->has_gid = false; 1217 req->service_id = sidr_param->service_id; 1218 req->pkey = sidr_param->pkey; 1219 if (req->pkey != sidr_param->bth_pkey) 1220 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1221 "RDMA CMA: in the future this may cause the request to be dropped\n", 1222 sidr_param->bth_pkey, req->pkey); 1223 break; 1224 default: 1225 return -EINVAL; 1226 } 1227 1228 return 0; 1229} 1230 1231static bool validate_ipv4_net_dev(struct net_device *net_dev, 1232 const struct sockaddr_in *dst_addr, 1233 const struct sockaddr_in *src_addr) 1234{ 1235#ifdef INET 1236 struct sockaddr_in dst_tmp = *dst_addr; 1237 __be32 daddr = dst_addr->sin_addr.s_addr, 1238 saddr = src_addr->sin_addr.s_addr; 1239 struct net_device *src_dev; 1240 struct rtentry *rte; 1241 bool ret; 1242 1243 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1244 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1245 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1246 ipv4_is_loopback(saddr)) 1247 return false; 1248 1249 src_dev = ip_dev_find(net_dev->if_vnet, saddr); 1250 if (src_dev != net_dev) { 1251 if (src_dev != NULL) 1252 dev_put(src_dev); 1253 return false; 1254 } 1255 dev_put(src_dev); 1256 1257 /* 1258 * Make sure the socket address length field 1259 * is set, else rtalloc1() will fail. 1260 */ 1261 dst_tmp.sin_len = sizeof(dst_tmp); 1262 1263 CURVNET_SET(net_dev->if_vnet); 1264 rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); 1265 CURVNET_RESTORE(); 1266 if (rte != NULL) { 1267 ret = (rte->rt_ifp == net_dev); 1268 RTFREE_LOCKED(rte); 1269 } else { 1270 ret = false; 1271 } 1272 return ret; 1273#else 1274 return false; 1275#endif 1276} 1277 1278static bool validate_ipv6_net_dev(struct net_device *net_dev, 1279 const struct sockaddr_in6 *dst_addr, 1280 const struct sockaddr_in6 *src_addr) 1281{ 1282#ifdef INET6 1283 struct sockaddr_in6 dst_tmp = *dst_addr; 1284 struct in6_addr in6_addr = src_addr->sin6_addr; 1285 struct net_device *src_dev; 1286 struct rtentry *rte; 1287 bool ret; 1288 1289 src_dev = ip6_dev_find(net_dev->if_vnet, in6_addr); 1290 if (src_dev != net_dev) 1291 return false; 1292 1293 /* 1294 * Make sure the socket address length field 1295 * is set, else rtalloc1() will fail. 1296 */ 1297 dst_tmp.sin6_len = sizeof(dst_tmp); 1298 1299 CURVNET_SET(net_dev->if_vnet); 1300 rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); 1301 CURVNET_RESTORE(); 1302 if (rte != NULL) { 1303 ret = (rte->rt_ifp == net_dev); 1304 RTFREE_LOCKED(rte); 1305 } else { 1306 ret = false; 1307 } 1308 return ret; 1309#else 1310 return false; 1311#endif 1312} 1313 1314static bool validate_net_dev(struct net_device *net_dev, 1315 const struct sockaddr *daddr, 1316 const struct sockaddr *saddr) 1317{ 1318 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1319 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1320 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1321 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1322 1323 switch (daddr->sa_family) { 1324 case AF_INET: 1325 return saddr->sa_family == AF_INET && 1326 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1327 1328 case AF_INET6: 1329 return saddr->sa_family == AF_INET6 && 1330 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1331 1332 default: 1333 return false; 1334 } 1335} 1336 1337static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1338 const struct cma_req_info *req) 1339{ 1340 struct sockaddr_storage listen_addr_storage, src_addr_storage; 1341 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, 1342 *src_addr = (struct sockaddr *)&src_addr_storage; 1343 struct net_device *net_dev; 1344 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1345 int err; 1346 1347 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1348 req->service_id); 1349 if (err) 1350 return ERR_PTR(err); 1351 1352 net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, 1353 gid, listen_addr); 1354 if (!net_dev) 1355 return ERR_PTR(-ENODEV); 1356 1357 if (!validate_net_dev(net_dev, listen_addr, src_addr)) { 1358 dev_put(net_dev); 1359 return ERR_PTR(-EHOSTUNREACH); 1360 } 1361 1362 return net_dev; 1363} 1364 1365static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) 1366{ 1367 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1368} 1369 1370static bool cma_match_private_data(struct rdma_id_private *id_priv, 1371 const struct cma_hdr *hdr) 1372{ 1373 struct sockaddr *addr = cma_src_addr(id_priv); 1374 __be32 ip4_addr; 1375 struct in6_addr ip6_addr; 1376 1377 if (cma_any_addr(addr) && !id_priv->afonly) 1378 return true; 1379 1380 switch (addr->sa_family) { 1381 case AF_INET: 1382 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1383 if (cma_get_ip_ver(hdr) != 4) 1384 return false; 1385 if (!cma_any_addr(addr) && 1386 hdr->dst_addr.ip4.addr != ip4_addr) 1387 return false; 1388 break; 1389 case AF_INET6: 1390 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1391 if (cma_get_ip_ver(hdr) != 6) 1392 return false; 1393 if (!cma_any_addr(addr) && 1394 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1395 return false; 1396 break; 1397 case AF_IB: 1398 return true; 1399 default: 1400 return false; 1401 } 1402 1403 return true; 1404} 1405 1406static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) 1407{ 1408 enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); 1409 enum rdma_transport_type transport = 1410 rdma_node_get_transport(device->node_type); 1411 1412 return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; 1413} 1414 1415static bool cma_protocol_roce(const struct rdma_cm_id *id) 1416{ 1417 struct ib_device *device = id->device; 1418 const int port_num = id->port_num ?: rdma_start_port(device); 1419 1420 return cma_protocol_roce_dev_port(device, port_num); 1421} 1422 1423static bool cma_match_net_dev(const struct rdma_cm_id *id, 1424 const struct net_device *net_dev, 1425 u8 port_num) 1426{ 1427 const struct rdma_addr *addr = &id->route.addr; 1428 1429 if (!net_dev) 1430 /* This request is an AF_IB request or a RoCE request */ 1431 return (!id->port_num || id->port_num == port_num) && 1432 (addr->src_addr.ss_family == AF_IB || 1433 cma_protocol_roce_dev_port(id->device, port_num)); 1434 1435 return !addr->dev_addr.bound_dev_if || 1436 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1437 addr->dev_addr.bound_dev_if == net_dev->if_index); 1438} 1439 1440static struct rdma_id_private *cma_find_listener( 1441 const struct rdma_bind_list *bind_list, 1442 const struct ib_cm_id *cm_id, 1443 const struct ib_cm_event *ib_event, 1444 const struct cma_req_info *req, 1445 const struct net_device *net_dev) 1446{ 1447 struct rdma_id_private *id_priv, *id_priv_dev; 1448 1449 if (!bind_list) 1450 return ERR_PTR(-EINVAL); 1451 1452 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1453 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1454 if (id_priv->id.device == cm_id->device && 1455 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1456 return id_priv; 1457 list_for_each_entry(id_priv_dev, 1458 &id_priv->listen_list, 1459 listen_list) { 1460 if (id_priv_dev->id.device == cm_id->device && 1461 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1462 return id_priv_dev; 1463 } 1464 } 1465 } 1466 1467 return ERR_PTR(-EINVAL); 1468} 1469 1470static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1471 struct ib_cm_event *ib_event, 1472 struct net_device **net_dev) 1473{ 1474 struct cma_req_info req; 1475 struct rdma_bind_list *bind_list; 1476 struct rdma_id_private *id_priv; 1477 int err; 1478 1479 err = cma_save_req_info(ib_event, &req); 1480 if (err) 1481 return ERR_PTR(err); 1482 1483 *net_dev = cma_get_net_dev(ib_event, &req); 1484 if (IS_ERR(*net_dev)) { 1485 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1486 /* Assuming the protocol is AF_IB */ 1487 *net_dev = NULL; 1488 } else if (cma_protocol_roce_dev_port(req.device, req.port)) { 1489 /* TODO find the net dev matching the request parameters 1490 * through the RoCE GID table */ 1491 *net_dev = NULL; 1492 } else { 1493 return ERR_CAST(*net_dev); 1494 } 1495 } 1496 1497 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1498 rdma_ps_from_service_id(req.service_id), 1499 cma_port_from_service_id(req.service_id)); 1500 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1501 if (IS_ERR(id_priv) && *net_dev) { 1502 dev_put(*net_dev); 1503 *net_dev = NULL; 1504 } 1505 1506 return id_priv; 1507} 1508 1509static inline int cma_user_data_offset(struct rdma_id_private *id_priv) 1510{ 1511 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); 1512} 1513 1514static void cma_cancel_route(struct rdma_id_private *id_priv) 1515{ 1516 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1517 if (id_priv->query) 1518 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1519 } 1520} 1521 1522static void cma_cancel_listens(struct rdma_id_private *id_priv) 1523{ 1524 struct rdma_id_private *dev_id_priv; 1525 1526 /* 1527 * Remove from listen_any_list to prevent added devices from spawning 1528 * additional listen requests. 1529 */ 1530 mutex_lock(&lock); 1531 list_del(&id_priv->list); 1532 1533 while (!list_empty(&id_priv->listen_list)) { 1534 dev_id_priv = list_entry(id_priv->listen_list.next, 1535 struct rdma_id_private, listen_list); 1536 /* sync with device removal to avoid duplicate destruction */ 1537 list_del_init(&dev_id_priv->list); 1538 list_del(&dev_id_priv->listen_list); 1539 mutex_unlock(&lock); 1540 1541 rdma_destroy_id(&dev_id_priv->id); 1542 mutex_lock(&lock); 1543 } 1544 mutex_unlock(&lock); 1545} 1546 1547static void cma_cancel_operation(struct rdma_id_private *id_priv, 1548 enum rdma_cm_state state) 1549{ 1550 switch (state) { 1551 case RDMA_CM_ADDR_QUERY: 1552 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1553 break; 1554 case RDMA_CM_ROUTE_QUERY: 1555 cma_cancel_route(id_priv); 1556 break; 1557 case RDMA_CM_LISTEN: 1558 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1559 cma_cancel_listens(id_priv); 1560 break; 1561 default: 1562 break; 1563 } 1564} 1565 1566static void cma_release_port(struct rdma_id_private *id_priv) 1567{ 1568 struct rdma_bind_list *bind_list = id_priv->bind_list; 1569 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 1570 1571 if (!bind_list) 1572 return; 1573 1574 mutex_lock(&lock); 1575 hlist_del(&id_priv->node); 1576 if (hlist_empty(&bind_list->owners)) { 1577 cma_ps_remove(net, bind_list->ps, bind_list->port); 1578 kfree(bind_list); 1579 } 1580 mutex_unlock(&lock); 1581} 1582 1583static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1584{ 1585 struct cma_multicast *mc; 1586 1587 while (!list_empty(&id_priv->mc_list)) { 1588 mc = container_of(id_priv->mc_list.next, 1589 struct cma_multicast, list); 1590 list_del(&mc->list); 1591 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1592 id_priv->id.port_num)) { 1593 ib_sa_free_multicast(mc->multicast.ib); 1594 kfree(mc); 1595 } else { 1596 if (mc->igmp_joined) { 1597 struct rdma_dev_addr *dev_addr = 1598 &id_priv->id.route.addr.dev_addr; 1599 struct net_device *ndev = NULL; 1600 1601 if (dev_addr->bound_dev_if) 1602 ndev = dev_get_by_index(dev_addr->net, 1603 dev_addr->bound_dev_if); 1604 if (ndev) { 1605 dev_put(ndev); 1606 } 1607 } 1608 kref_put(&mc->mcref, release_mc); 1609 } 1610 } 1611} 1612 1613void rdma_destroy_id(struct rdma_cm_id *id) 1614{ 1615 struct rdma_id_private *id_priv; 1616 enum rdma_cm_state state; 1617 1618 id_priv = container_of(id, struct rdma_id_private, id); 1619 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1620 cma_cancel_operation(id_priv, state); 1621 1622 /* 1623 * Wait for any active callback to finish. New callbacks will find 1624 * the id_priv state set to destroying and abort. 1625 */ 1626 mutex_lock(&id_priv->handler_mutex); 1627 mutex_unlock(&id_priv->handler_mutex); 1628 1629 if (id_priv->cma_dev) { 1630 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1631 if (id_priv->cm_id.ib) 1632 ib_destroy_cm_id(id_priv->cm_id.ib); 1633 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1634 if (id_priv->cm_id.iw) 1635 iw_destroy_cm_id(id_priv->cm_id.iw); 1636 } 1637 cma_leave_mc_groups(id_priv); 1638 cma_release_dev(id_priv); 1639 } 1640 1641 cma_release_port(id_priv); 1642 cma_deref_id(id_priv); 1643 wait_for_completion(&id_priv->comp); 1644 1645 if (id_priv->internal_id) 1646 cma_deref_id(id_priv->id.context); 1647 1648 kfree(id_priv->id.route.path_rec); 1649 kfree(id_priv); 1650} 1651EXPORT_SYMBOL(rdma_destroy_id); 1652 1653static int cma_rep_recv(struct rdma_id_private *id_priv) 1654{ 1655 int ret; 1656 1657 ret = cma_modify_qp_rtr(id_priv, NULL); 1658 if (ret) 1659 goto reject; 1660 1661 ret = cma_modify_qp_rts(id_priv, NULL); 1662 if (ret) 1663 goto reject; 1664 1665 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1666 if (ret) 1667 goto reject; 1668 1669 return 0; 1670reject: 1671 cma_modify_qp_err(id_priv); 1672 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1673 NULL, 0, NULL, 0); 1674 return ret; 1675} 1676 1677static void cma_set_rep_event_data(struct rdma_cm_event *event, 1678 struct ib_cm_rep_event_param *rep_data, 1679 void *private_data) 1680{ 1681 event->param.conn.private_data = private_data; 1682 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1683 event->param.conn.responder_resources = rep_data->responder_resources; 1684 event->param.conn.initiator_depth = rep_data->initiator_depth; 1685 event->param.conn.flow_control = rep_data->flow_control; 1686 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1687 event->param.conn.srq = rep_data->srq; 1688 event->param.conn.qp_num = rep_data->remote_qpn; 1689} 1690 1691static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1692{ 1693 struct rdma_id_private *id_priv = cm_id->context; 1694 struct rdma_cm_event event; 1695 int ret = 0; 1696 1697 mutex_lock(&id_priv->handler_mutex); 1698 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1699 id_priv->state != RDMA_CM_CONNECT) || 1700 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1701 id_priv->state != RDMA_CM_DISCONNECT)) 1702 goto out; 1703 1704 memset(&event, 0, sizeof event); 1705 switch (ib_event->event) { 1706 case IB_CM_REQ_ERROR: 1707 case IB_CM_REP_ERROR: 1708 event.event = RDMA_CM_EVENT_UNREACHABLE; 1709 event.status = -ETIMEDOUT; 1710 break; 1711 case IB_CM_REP_RECEIVED: 1712 if (id_priv->id.qp) { 1713 event.status = cma_rep_recv(id_priv); 1714 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1715 RDMA_CM_EVENT_ESTABLISHED; 1716 } else { 1717 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1718 } 1719 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1720 ib_event->private_data); 1721 break; 1722 case IB_CM_RTU_RECEIVED: 1723 case IB_CM_USER_ESTABLISHED: 1724 event.event = RDMA_CM_EVENT_ESTABLISHED; 1725 break; 1726 case IB_CM_DREQ_ERROR: 1727 event.status = -ETIMEDOUT; /* fall through */ 1728 case IB_CM_DREQ_RECEIVED: 1729 case IB_CM_DREP_RECEIVED: 1730 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1731 RDMA_CM_DISCONNECT)) 1732 goto out; 1733 event.event = RDMA_CM_EVENT_DISCONNECTED; 1734 break; 1735 case IB_CM_TIMEWAIT_EXIT: 1736 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1737 break; 1738 case IB_CM_MRA_RECEIVED: 1739 /* ignore event */ 1740 goto out; 1741 case IB_CM_REJ_RECEIVED: 1742 cma_modify_qp_err(id_priv); 1743 event.status = ib_event->param.rej_rcvd.reason; 1744 event.event = RDMA_CM_EVENT_REJECTED; 1745 event.param.conn.private_data = ib_event->private_data; 1746 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1747 break; 1748 default: 1749 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 1750 ib_event->event); 1751 goto out; 1752 } 1753 1754 ret = id_priv->id.event_handler(&id_priv->id, &event); 1755 if (ret) { 1756 /* Destroy the CM ID by returning a non-zero value. */ 1757 id_priv->cm_id.ib = NULL; 1758 cma_exch(id_priv, RDMA_CM_DESTROYING); 1759 mutex_unlock(&id_priv->handler_mutex); 1760 rdma_destroy_id(&id_priv->id); 1761 return ret; 1762 } 1763out: 1764 mutex_unlock(&id_priv->handler_mutex); 1765 return ret; 1766} 1767 1768static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 1769 struct ib_cm_event *ib_event, 1770 struct net_device *net_dev) 1771{ 1772 struct rdma_id_private *id_priv; 1773 struct rdma_cm_id *id; 1774 struct rdma_route *rt; 1775 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1776 const __be64 service_id = 1777 ib_event->param.req_rcvd.primary_path->service_id; 1778 int ret; 1779 1780 id = rdma_create_id(listen_id->route.addr.dev_addr.net, 1781 listen_id->event_handler, listen_id->context, 1782 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1783 if (IS_ERR(id)) 1784 return NULL; 1785 1786 id_priv = container_of(id, struct rdma_id_private, id); 1787 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1788 (struct sockaddr *)&id->route.addr.dst_addr, 1789 listen_id, ib_event, ss_family, service_id)) 1790 goto err; 1791 1792 rt = &id->route; 1793 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 1794 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 1795 GFP_KERNEL); 1796 if (!rt->path_rec) 1797 goto err; 1798 1799 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; 1800 if (rt->num_paths == 2) 1801 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1802 1803 if (net_dev) { 1804 ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 1805 if (ret) 1806 goto err; 1807 } else { 1808 if (!cma_protocol_roce(listen_id) && 1809 cma_any_addr(cma_src_addr(id_priv))) { 1810 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1811 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1812 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1813 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 1814 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 1815 if (ret) 1816 goto err; 1817 } 1818 } 1819 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1820 1821 id_priv->state = RDMA_CM_CONNECT; 1822 return id_priv; 1823 1824err: 1825 rdma_destroy_id(id); 1826 return NULL; 1827} 1828 1829static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 1830 struct ib_cm_event *ib_event, 1831 struct net_device *net_dev) 1832{ 1833 struct rdma_id_private *id_priv; 1834 struct rdma_cm_id *id; 1835 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1836 struct vnet *net = listen_id->route.addr.dev_addr.net; 1837 int ret; 1838 1839 id = rdma_create_id(net, listen_id->event_handler, listen_id->context, 1840 listen_id->ps, IB_QPT_UD); 1841 if (IS_ERR(id)) 1842 return NULL; 1843 1844 id_priv = container_of(id, struct rdma_id_private, id); 1845 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1846 (struct sockaddr *)&id->route.addr.dst_addr, 1847 listen_id, ib_event, ss_family, 1848 ib_event->param.sidr_req_rcvd.service_id)) 1849 goto err; 1850 1851 if (net_dev) { 1852 ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 1853 if (ret) 1854 goto err; 1855 } else { 1856 if (!cma_any_addr(cma_src_addr(id_priv))) { 1857 ret = cma_translate_addr(cma_src_addr(id_priv), 1858 &id->route.addr.dev_addr); 1859 if (ret) 1860 goto err; 1861 } 1862 } 1863 1864 id_priv->state = RDMA_CM_CONNECT; 1865 return id_priv; 1866err: 1867 rdma_destroy_id(id); 1868 return NULL; 1869} 1870 1871static void cma_set_req_event_data(struct rdma_cm_event *event, 1872 struct ib_cm_req_event_param *req_data, 1873 void *private_data, int offset) 1874{ 1875 event->param.conn.private_data = (char *)private_data + offset; 1876 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 1877 event->param.conn.responder_resources = req_data->responder_resources; 1878 event->param.conn.initiator_depth = req_data->initiator_depth; 1879 event->param.conn.flow_control = req_data->flow_control; 1880 event->param.conn.retry_count = req_data->retry_count; 1881 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 1882 event->param.conn.srq = req_data->srq; 1883 event->param.conn.qp_num = req_data->remote_qpn; 1884} 1885 1886static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 1887{ 1888 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 1889 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 1890 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 1891 (id->qp_type == IB_QPT_UD)) || 1892 (!id->qp_type)); 1893} 1894 1895static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1896{ 1897 struct rdma_id_private *listen_id, *conn_id = NULL; 1898 struct rdma_cm_event event; 1899 struct net_device *net_dev; 1900 int offset, ret; 1901 1902 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 1903 if (IS_ERR(listen_id)) 1904 return PTR_ERR(listen_id); 1905 1906 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 1907 ret = -EINVAL; 1908 goto net_dev_put; 1909 } 1910 1911 mutex_lock(&listen_id->handler_mutex); 1912 if (listen_id->state != RDMA_CM_LISTEN) { 1913 ret = -ECONNABORTED; 1914 goto err1; 1915 } 1916 1917 memset(&event, 0, sizeof event); 1918 offset = cma_user_data_offset(listen_id); 1919 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1920 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1921 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 1922 event.param.ud.private_data = (char *)ib_event->private_data + offset; 1923 event.param.ud.private_data_len = 1924 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 1925 } else { 1926 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 1927 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 1928 ib_event->private_data, offset); 1929 } 1930 if (!conn_id) { 1931 ret = -ENOMEM; 1932 goto err1; 1933 } 1934 1935 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1936 ret = cma_acquire_dev(conn_id, listen_id); 1937 if (ret) 1938 goto err2; 1939 1940 conn_id->cm_id.ib = cm_id; 1941 cm_id->context = conn_id; 1942 cm_id->cm_handler = cma_ib_handler; 1943 1944 /* 1945 * Protect against the user destroying conn_id from another thread 1946 * until we're done accessing it. 1947 */ 1948 atomic_inc(&conn_id->refcount); 1949 ret = conn_id->id.event_handler(&conn_id->id, &event); 1950 if (ret) 1951 goto err3; 1952 /* 1953 * Acquire mutex to prevent user executing rdma_destroy_id() 1954 * while we're accessing the cm_id. 1955 */ 1956 mutex_lock(&lock); 1957 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 1958 (conn_id->id.qp_type != IB_QPT_UD)) 1959 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1960 mutex_unlock(&lock); 1961 mutex_unlock(&conn_id->handler_mutex); 1962 mutex_unlock(&listen_id->handler_mutex); 1963 cma_deref_id(conn_id); 1964 if (net_dev) 1965 dev_put(net_dev); 1966 return 0; 1967 1968err3: 1969 cma_deref_id(conn_id); 1970 /* Destroy the CM ID by returning a non-zero value. */ 1971 conn_id->cm_id.ib = NULL; 1972err2: 1973 cma_exch(conn_id, RDMA_CM_DESTROYING); 1974 mutex_unlock(&conn_id->handler_mutex); 1975err1: 1976 mutex_unlock(&listen_id->handler_mutex); 1977 if (conn_id) 1978 rdma_destroy_id(&conn_id->id); 1979 1980net_dev_put: 1981 if (net_dev) 1982 dev_put(net_dev); 1983 1984 return ret; 1985} 1986 1987__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 1988{ 1989 if (addr->sa_family == AF_IB) 1990 return ((struct sockaddr_ib *) addr)->sib_sid; 1991 1992 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 1993} 1994EXPORT_SYMBOL(rdma_get_service_id); 1995 1996static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 1997{ 1998 struct rdma_id_private *id_priv = iw_id->context; 1999 struct rdma_cm_event event; 2000 int ret = 0; 2001 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2002 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2003 2004 mutex_lock(&id_priv->handler_mutex); 2005 if (id_priv->state != RDMA_CM_CONNECT) 2006 goto out; 2007 2008 memset(&event, 0, sizeof event); 2009 switch (iw_event->event) { 2010 case IW_CM_EVENT_CLOSE: 2011 event.event = RDMA_CM_EVENT_DISCONNECTED; 2012 break; 2013 case IW_CM_EVENT_CONNECT_REPLY: 2014 memcpy(cma_src_addr(id_priv), laddr, 2015 rdma_addr_size(laddr)); 2016 memcpy(cma_dst_addr(id_priv), raddr, 2017 rdma_addr_size(raddr)); 2018 switch (iw_event->status) { 2019 case 0: 2020 event.event = RDMA_CM_EVENT_ESTABLISHED; 2021 event.param.conn.initiator_depth = iw_event->ird; 2022 event.param.conn.responder_resources = iw_event->ord; 2023 break; 2024 case -ECONNRESET: 2025 case -ECONNREFUSED: 2026 event.event = RDMA_CM_EVENT_REJECTED; 2027 break; 2028 case -ETIMEDOUT: 2029 event.event = RDMA_CM_EVENT_UNREACHABLE; 2030 break; 2031 default: 2032 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2033 break; 2034 } 2035 break; 2036 case IW_CM_EVENT_ESTABLISHED: 2037 event.event = RDMA_CM_EVENT_ESTABLISHED; 2038 event.param.conn.initiator_depth = iw_event->ird; 2039 event.param.conn.responder_resources = iw_event->ord; 2040 break; 2041 default: 2042 BUG_ON(1); 2043 } 2044 2045 event.status = iw_event->status; 2046 event.param.conn.private_data = iw_event->private_data; 2047 event.param.conn.private_data_len = iw_event->private_data_len; 2048 ret = id_priv->id.event_handler(&id_priv->id, &event); 2049 if (ret) { 2050 /* Destroy the CM ID by returning a non-zero value. */ 2051 id_priv->cm_id.iw = NULL; 2052 cma_exch(id_priv, RDMA_CM_DESTROYING); 2053 mutex_unlock(&id_priv->handler_mutex); 2054 rdma_destroy_id(&id_priv->id); 2055 return ret; 2056 } 2057 2058out: 2059 mutex_unlock(&id_priv->handler_mutex); 2060 return ret; 2061} 2062 2063static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2064 struct iw_cm_event *iw_event) 2065{ 2066 struct rdma_cm_id *new_cm_id; 2067 struct rdma_id_private *listen_id, *conn_id; 2068 struct rdma_cm_event event; 2069 int ret = -ECONNABORTED; 2070 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2071 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2072 2073 listen_id = cm_id->context; 2074 2075 mutex_lock(&listen_id->handler_mutex); 2076 if (listen_id->state != RDMA_CM_LISTEN) 2077 goto out; 2078 2079 /* Create a new RDMA id for the new IW CM ID */ 2080 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2081 listen_id->id.event_handler, 2082 listen_id->id.context, 2083 RDMA_PS_TCP, IB_QPT_RC); 2084 if (IS_ERR(new_cm_id)) { 2085 ret = -ENOMEM; 2086 goto out; 2087 } 2088 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2089 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2090 conn_id->state = RDMA_CM_CONNECT; 2091 2092 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL); 2093 if (ret) { 2094 mutex_unlock(&conn_id->handler_mutex); 2095 rdma_destroy_id(new_cm_id); 2096 goto out; 2097 } 2098 2099 ret = cma_acquire_dev(conn_id, listen_id); 2100 if (ret) { 2101 mutex_unlock(&conn_id->handler_mutex); 2102 rdma_destroy_id(new_cm_id); 2103 goto out; 2104 } 2105 2106 conn_id->cm_id.iw = cm_id; 2107 cm_id->context = conn_id; 2108 cm_id->cm_handler = cma_iw_handler; 2109 2110 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2111 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2112 2113 memset(&event, 0, sizeof event); 2114 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2115 event.param.conn.private_data = iw_event->private_data; 2116 event.param.conn.private_data_len = iw_event->private_data_len; 2117 event.param.conn.initiator_depth = iw_event->ird; 2118 event.param.conn.responder_resources = iw_event->ord; 2119 2120 /* 2121 * Protect against the user destroying conn_id from another thread 2122 * until we're done accessing it. 2123 */ 2124 atomic_inc(&conn_id->refcount); 2125 ret = conn_id->id.event_handler(&conn_id->id, &event); 2126 if (ret) { 2127 /* User wants to destroy the CM ID */ 2128 conn_id->cm_id.iw = NULL; 2129 cma_exch(conn_id, RDMA_CM_DESTROYING); 2130 mutex_unlock(&conn_id->handler_mutex); 2131 cma_deref_id(conn_id); 2132 rdma_destroy_id(&conn_id->id); 2133 goto out; 2134 } 2135 2136 mutex_unlock(&conn_id->handler_mutex); 2137 cma_deref_id(conn_id); 2138 2139out: 2140 mutex_unlock(&listen_id->handler_mutex); 2141 return ret; 2142} 2143 2144static int cma_ib_listen(struct rdma_id_private *id_priv) 2145{ 2146 struct sockaddr *addr; 2147 struct ib_cm_id *id; 2148 __be64 svc_id; 2149 2150 addr = cma_src_addr(id_priv); 2151 svc_id = rdma_get_service_id(&id_priv->id, addr); 2152 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2153 if (IS_ERR(id)) 2154 return PTR_ERR(id); 2155 id_priv->cm_id.ib = id; 2156 2157 return 0; 2158} 2159 2160static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2161{ 2162 int ret; 2163 struct iw_cm_id *id; 2164 2165 id = iw_create_cm_id(id_priv->id.device, 2166 iw_conn_req_handler, 2167 id_priv); 2168 if (IS_ERR(id)) 2169 return PTR_ERR(id); 2170 2171 id->tos = id_priv->tos; 2172 id_priv->cm_id.iw = id; 2173 2174 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2175 rdma_addr_size(cma_src_addr(id_priv))); 2176 2177 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2178 2179 if (ret) { 2180 iw_destroy_cm_id(id_priv->cm_id.iw); 2181 id_priv->cm_id.iw = NULL; 2182 } 2183 2184 return ret; 2185} 2186 2187static int cma_listen_handler(struct rdma_cm_id *id, 2188 struct rdma_cm_event *event) 2189{ 2190 struct rdma_id_private *id_priv = id->context; 2191 2192 id->context = id_priv->id.context; 2193 id->event_handler = id_priv->id.event_handler; 2194 return id_priv->id.event_handler(id, event); 2195} 2196 2197static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2198 struct cma_device *cma_dev) 2199{ 2200 struct rdma_id_private *dev_id_priv; 2201 struct rdma_cm_id *id; 2202 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 2203 int ret; 2204 2205 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2206 return; 2207 2208 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2209 id_priv->id.qp_type); 2210 if (IS_ERR(id)) 2211 return; 2212 2213 dev_id_priv = container_of(id, struct rdma_id_private, id); 2214 2215 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2216 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2217 rdma_addr_size(cma_src_addr(id_priv))); 2218 2219 _cma_attach_to_dev(dev_id_priv, cma_dev); 2220 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2221 atomic_inc(&id_priv->refcount); 2222 dev_id_priv->internal_id = 1; 2223 dev_id_priv->afonly = id_priv->afonly; 2224 2225 ret = rdma_listen(id, id_priv->backlog); 2226 if (ret) 2227 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2228 ret, cma_dev->device->name); 2229} 2230 2231static void cma_listen_on_all(struct rdma_id_private *id_priv) 2232{ 2233 struct cma_device *cma_dev; 2234 2235 mutex_lock(&lock); 2236 list_add_tail(&id_priv->list, &listen_any_list); 2237 list_for_each_entry(cma_dev, &dev_list, list) 2238 cma_listen_on_dev(id_priv, cma_dev); 2239 mutex_unlock(&lock); 2240} 2241 2242void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2243{ 2244 struct rdma_id_private *id_priv; 2245 2246 id_priv = container_of(id, struct rdma_id_private, id); 2247 id_priv->tos = (u8) tos; 2248} 2249EXPORT_SYMBOL(rdma_set_service_type); 2250 2251static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, 2252 void *context) 2253{ 2254 struct cma_work *work = context; 2255 struct rdma_route *route; 2256 2257 route = &work->id->id.route; 2258 2259 if (!status) { 2260 route->num_paths = 1; 2261 *route->path_rec = *path_rec; 2262 } else { 2263 work->old_state = RDMA_CM_ROUTE_QUERY; 2264 work->new_state = RDMA_CM_ADDR_RESOLVED; 2265 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2266 work->event.status = status; 2267 } 2268 2269 queue_work(cma_wq, &work->work); 2270} 2271 2272static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2273 struct cma_work *work) 2274{ 2275 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2276 struct ib_sa_path_rec path_rec; 2277 ib_sa_comp_mask comp_mask; 2278 struct sockaddr_in6 *sin6; 2279 struct sockaddr_ib *sib; 2280 2281 memset(&path_rec, 0, sizeof path_rec); 2282 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2283 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2284 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2285 path_rec.numb_path = 1; 2286 path_rec.reversible = 1; 2287 path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 2288 2289 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2290 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2291 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2292 2293 switch (cma_family(id_priv)) { 2294 case AF_INET: 2295 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2296 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2297 break; 2298 case AF_INET6: 2299 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2300 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2301 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2302 break; 2303 case AF_IB: 2304 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2305 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2306 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2307 break; 2308 } 2309 2310 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2311 id_priv->id.port_num, &path_rec, 2312 comp_mask, timeout_ms, 2313 GFP_KERNEL, cma_query_handler, 2314 work, &id_priv->query); 2315 2316 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2317} 2318 2319static void cma_work_handler(struct work_struct *_work) 2320{ 2321 struct cma_work *work = container_of(_work, struct cma_work, work); 2322 struct rdma_id_private *id_priv = work->id; 2323 int destroy = 0; 2324 2325 mutex_lock(&id_priv->handler_mutex); 2326 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2327 goto out; 2328 2329 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2330 cma_exch(id_priv, RDMA_CM_DESTROYING); 2331 destroy = 1; 2332 } 2333out: 2334 mutex_unlock(&id_priv->handler_mutex); 2335 cma_deref_id(id_priv); 2336 if (destroy) 2337 rdma_destroy_id(&id_priv->id); 2338 kfree(work); 2339} 2340 2341static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2342{ 2343 struct rdma_route *route = &id_priv->id.route; 2344 struct cma_work *work; 2345 int ret; 2346 2347 work = kzalloc(sizeof *work, GFP_KERNEL); 2348 if (!work) 2349 return -ENOMEM; 2350 2351 work->id = id_priv; 2352 INIT_WORK(&work->work, cma_work_handler); 2353 work->old_state = RDMA_CM_ROUTE_QUERY; 2354 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2355 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2356 2357 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2358 if (!route->path_rec) { 2359 ret = -ENOMEM; 2360 goto err1; 2361 } 2362 2363 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2364 if (ret) 2365 goto err2; 2366 2367 return 0; 2368err2: 2369 kfree(route->path_rec); 2370 route->path_rec = NULL; 2371err1: 2372 kfree(work); 2373 return ret; 2374} 2375 2376int rdma_set_ib_paths(struct rdma_cm_id *id, 2377 struct ib_sa_path_rec *path_rec, int num_paths) 2378{ 2379 struct rdma_id_private *id_priv; 2380 int ret; 2381 2382 id_priv = container_of(id, struct rdma_id_private, id); 2383 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2384 RDMA_CM_ROUTE_RESOLVED)) 2385 return -EINVAL; 2386 2387 id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, 2388 GFP_KERNEL); 2389 if (!id->route.path_rec) { 2390 ret = -ENOMEM; 2391 goto err; 2392 } 2393 2394 id->route.num_paths = num_paths; 2395 return 0; 2396err: 2397 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2398 return ret; 2399} 2400EXPORT_SYMBOL(rdma_set_ib_paths); 2401 2402static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2403{ 2404 struct cma_work *work; 2405 2406 work = kzalloc(sizeof *work, GFP_KERNEL); 2407 if (!work) 2408 return -ENOMEM; 2409 2410 work->id = id_priv; 2411 INIT_WORK(&work->work, cma_work_handler); 2412 work->old_state = RDMA_CM_ROUTE_QUERY; 2413 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2414 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2415 queue_work(cma_wq, &work->work); 2416 return 0; 2417} 2418 2419static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2420{ 2421 /* TODO: Implement this function */ 2422 return 0; 2423} 2424 2425static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2426 unsigned long supported_gids, 2427 enum ib_gid_type default_gid) 2428{ 2429 if ((network_type == RDMA_NETWORK_IPV4 || 2430 network_type == RDMA_NETWORK_IPV6) && 2431 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2432 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2433 2434 return default_gid; 2435} 2436 2437static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2438{ 2439 struct rdma_route *route = &id_priv->id.route; 2440 struct rdma_addr *addr = &route->addr; 2441 struct cma_work *work; 2442 int ret; 2443 struct net_device *ndev = NULL; 2444 2445 2446 work = kzalloc(sizeof *work, GFP_KERNEL); 2447 if (!work) 2448 return -ENOMEM; 2449 2450 work->id = id_priv; 2451 INIT_WORK(&work->work, cma_work_handler); 2452 2453 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2454 if (!route->path_rec) { 2455 ret = -ENOMEM; 2456 goto err1; 2457 } 2458 2459 route->num_paths = 1; 2460 2461 if (addr->dev_addr.bound_dev_if) { 2462 unsigned long supported_gids; 2463 2464 ndev = dev_get_by_index(addr->dev_addr.net, 2465 addr->dev_addr.bound_dev_if); 2466 if (!ndev) { 2467 ret = -ENODEV; 2468 goto err2; 2469 } 2470 2471 if (ndev->if_flags & IFF_LOOPBACK) { 2472 dev_put(ndev); 2473 if (!id_priv->id.device->get_netdev) { 2474 ret = -EOPNOTSUPP; 2475 goto err2; 2476 } 2477 2478 ndev = id_priv->id.device->get_netdev(id_priv->id.device, 2479 id_priv->id.port_num); 2480 if (!ndev) { 2481 ret = -ENODEV; 2482 goto err2; 2483 } 2484 } 2485 2486 route->path_rec->net = ndev->if_vnet; 2487 route->path_rec->ifindex = ndev->if_index; 2488 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2489 id_priv->id.port_num); 2490 route->path_rec->gid_type = 2491 cma_route_gid_type(addr->dev_addr.network, 2492 supported_gids, 2493 id_priv->gid_type); 2494 } 2495 if (!ndev) { 2496 ret = -ENODEV; 2497 goto err2; 2498 } 2499 2500 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); 2501 2502 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2503 &route->path_rec->sgid); 2504 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2505 &route->path_rec->dgid); 2506 2507 /* Use the hint from IP Stack to select GID Type */ 2508 if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2509 route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2510 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2511 /* TODO: get the hoplimit from the inet/inet6 device */ 2512 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2513 else 2514 route->path_rec->hop_limit = 1; 2515 route->path_rec->reversible = 1; 2516 route->path_rec->pkey = cpu_to_be16(0xffff); 2517 route->path_rec->mtu_selector = IB_SA_EQ; 2518 route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); 2519 route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); 2520 route->path_rec->rate_selector = IB_SA_EQ; 2521 route->path_rec->rate = iboe_get_rate(ndev); 2522 dev_put(ndev); 2523 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2524 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2525 if (!route->path_rec->mtu) { 2526 ret = -EINVAL; 2527 goto err2; 2528 } 2529 2530 work->old_state = RDMA_CM_ROUTE_QUERY; 2531 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2532 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2533 work->event.status = 0; 2534 2535 queue_work(cma_wq, &work->work); 2536 2537 return 0; 2538 2539err2: 2540 kfree(route->path_rec); 2541 route->path_rec = NULL; 2542err1: 2543 kfree(work); 2544 return ret; 2545} 2546 2547int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2548{ 2549 struct rdma_id_private *id_priv; 2550 int ret; 2551 2552 id_priv = container_of(id, struct rdma_id_private, id); 2553 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2554 return -EINVAL; 2555 2556 atomic_inc(&id_priv->refcount); 2557 if (rdma_cap_ib_sa(id->device, id->port_num)) 2558 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2559 else if (rdma_protocol_roce(id->device, id->port_num)) 2560 ret = cma_resolve_iboe_route(id_priv); 2561 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2562 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2563 else 2564 ret = -ENOSYS; 2565 2566 if (ret) 2567 goto err; 2568 2569 return 0; 2570err: 2571 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2572 cma_deref_id(id_priv); 2573 return ret; 2574} 2575EXPORT_SYMBOL(rdma_resolve_route); 2576 2577static void cma_set_loopback(struct sockaddr *addr) 2578{ 2579 switch (addr->sa_family) { 2580 case AF_INET: 2581 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2582 break; 2583 case AF_INET6: 2584 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2585 0, 0, 0, htonl(1)); 2586 break; 2587 default: 2588 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2589 0, 0, 0, htonl(1)); 2590 break; 2591 } 2592} 2593 2594static int cma_bind_loopback(struct rdma_id_private *id_priv) 2595{ 2596 struct cma_device *cma_dev, *cur_dev; 2597 struct ib_port_attr port_attr; 2598 union ib_gid gid; 2599 u16 pkey; 2600 int ret; 2601 u8 p; 2602 2603 cma_dev = NULL; 2604 mutex_lock(&lock); 2605 list_for_each_entry(cur_dev, &dev_list, list) { 2606 if (cma_family(id_priv) == AF_IB && 2607 !rdma_cap_ib_cm(cur_dev->device, 1)) 2608 continue; 2609 2610 if (!cma_dev) 2611 cma_dev = cur_dev; 2612 2613 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2614 if (!ib_query_port(cur_dev->device, p, &port_attr) && 2615 port_attr.state == IB_PORT_ACTIVE) { 2616 cma_dev = cur_dev; 2617 goto port_found; 2618 } 2619 } 2620 } 2621 2622 if (!cma_dev) { 2623 ret = -ENODEV; 2624 goto out; 2625 } 2626 2627 p = 1; 2628 2629port_found: 2630 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2631 if (ret) 2632 goto out; 2633 2634 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2635 if (ret) 2636 goto out; 2637 2638 id_priv->id.route.addr.dev_addr.dev_type = 2639 (rdma_protocol_ib(cma_dev->device, p)) ? 2640 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2641 2642 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2643 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2644 id_priv->id.port_num = p; 2645 cma_attach_to_dev(id_priv, cma_dev); 2646 cma_set_loopback(cma_src_addr(id_priv)); 2647out: 2648 mutex_unlock(&lock); 2649 return ret; 2650} 2651 2652static void addr_handler(int status, struct sockaddr *src_addr, 2653 struct rdma_dev_addr *dev_addr, void *context) 2654{ 2655 struct rdma_id_private *id_priv = context; 2656 struct rdma_cm_event event; 2657 2658 memset(&event, 0, sizeof event); 2659 mutex_lock(&id_priv->handler_mutex); 2660 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2661 RDMA_CM_ADDR_RESOLVED)) 2662 goto out; 2663 2664 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2665 if (!status && !id_priv->cma_dev) 2666 status = cma_acquire_dev(id_priv, NULL); 2667 2668 if (status) { 2669 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2670 RDMA_CM_ADDR_BOUND)) 2671 goto out; 2672 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2673 event.status = status; 2674 } else 2675 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2676 2677 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2678 cma_exch(id_priv, RDMA_CM_DESTROYING); 2679 mutex_unlock(&id_priv->handler_mutex); 2680 cma_deref_id(id_priv); 2681 rdma_destroy_id(&id_priv->id); 2682 return; 2683 } 2684out: 2685 mutex_unlock(&id_priv->handler_mutex); 2686 cma_deref_id(id_priv); 2687} 2688 2689static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2690{ 2691 struct cma_work *work; 2692 union ib_gid gid; 2693 int ret; 2694 2695 work = kzalloc(sizeof *work, GFP_KERNEL); 2696 if (!work) 2697 return -ENOMEM; 2698 2699 if (!id_priv->cma_dev) { 2700 ret = cma_bind_loopback(id_priv); 2701 if (ret) 2702 goto err; 2703 } 2704 2705 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2706 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2707 2708 work->id = id_priv; 2709 INIT_WORK(&work->work, cma_work_handler); 2710 work->old_state = RDMA_CM_ADDR_QUERY; 2711 work->new_state = RDMA_CM_ADDR_RESOLVED; 2712 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2713 queue_work(cma_wq, &work->work); 2714 return 0; 2715err: 2716 kfree(work); 2717 return ret; 2718} 2719 2720static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2721{ 2722 struct cma_work *work; 2723 int ret; 2724 2725 work = kzalloc(sizeof *work, GFP_KERNEL); 2726 if (!work) 2727 return -ENOMEM; 2728 2729 if (!id_priv->cma_dev) { 2730 ret = cma_resolve_ib_dev(id_priv); 2731 if (ret) 2732 goto err; 2733 } 2734 2735 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2736 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2737 2738 work->id = id_priv; 2739 INIT_WORK(&work->work, cma_work_handler); 2740 work->old_state = RDMA_CM_ADDR_QUERY; 2741 work->new_state = RDMA_CM_ADDR_RESOLVED; 2742 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2743 queue_work(cma_wq, &work->work); 2744 return 0; 2745err: 2746 kfree(work); 2747 return ret; 2748} 2749 2750static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2751 struct sockaddr *dst_addr) 2752{ 2753 if (!src_addr || !src_addr->sa_family) { 2754 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2755 src_addr->sa_family = dst_addr->sa_family; 2756 if (dst_addr->sa_family == AF_INET6) { 2757 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 2758 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 2759 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 2760 if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr)) 2761 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 2762 } else if (dst_addr->sa_family == AF_IB) { 2763 ((struct sockaddr_ib *) src_addr)->sib_pkey = 2764 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 2765 } 2766 } 2767 return rdma_bind_addr(id, src_addr); 2768} 2769 2770int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2771 struct sockaddr *dst_addr, int timeout_ms) 2772{ 2773 struct rdma_id_private *id_priv; 2774 int ret; 2775 2776 id_priv = container_of(id, struct rdma_id_private, id); 2777 if (id_priv->state == RDMA_CM_IDLE) { 2778 ret = cma_bind_addr(id, src_addr, dst_addr); 2779 if (ret) 2780 return ret; 2781 } 2782 2783 if (cma_family(id_priv) != dst_addr->sa_family) 2784 return -EINVAL; 2785 2786 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) 2787 return -EINVAL; 2788 2789 atomic_inc(&id_priv->refcount); 2790 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 2791 if (cma_any_addr(dst_addr)) { 2792 ret = cma_resolve_loopback(id_priv); 2793 } else { 2794 if (dst_addr->sa_family == AF_IB) { 2795 ret = cma_resolve_ib_addr(id_priv); 2796 } else { 2797 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 2798 dst_addr, &id->route.addr.dev_addr, 2799 timeout_ms, addr_handler, id_priv); 2800 } 2801 } 2802 if (ret) 2803 goto err; 2804 2805 return 0; 2806err: 2807 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 2808 cma_deref_id(id_priv); 2809 return ret; 2810} 2811EXPORT_SYMBOL(rdma_resolve_addr); 2812 2813int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 2814{ 2815 struct rdma_id_private *id_priv; 2816 unsigned long flags; 2817 int ret; 2818 2819 id_priv = container_of(id, struct rdma_id_private, id); 2820 spin_lock_irqsave(&id_priv->lock, flags); 2821 if (reuse || id_priv->state == RDMA_CM_IDLE) { 2822 id_priv->reuseaddr = reuse; 2823 ret = 0; 2824 } else { 2825 ret = -EINVAL; 2826 } 2827 spin_unlock_irqrestore(&id_priv->lock, flags); 2828 return ret; 2829} 2830EXPORT_SYMBOL(rdma_set_reuseaddr); 2831 2832int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 2833{ 2834 struct rdma_id_private *id_priv; 2835 unsigned long flags; 2836 int ret; 2837 2838 id_priv = container_of(id, struct rdma_id_private, id); 2839 spin_lock_irqsave(&id_priv->lock, flags); 2840 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 2841 id_priv->options |= (1 << CMA_OPTION_AFONLY); 2842 id_priv->afonly = afonly; 2843 ret = 0; 2844 } else { 2845 ret = -EINVAL; 2846 } 2847 spin_unlock_irqrestore(&id_priv->lock, flags); 2848 return ret; 2849} 2850EXPORT_SYMBOL(rdma_set_afonly); 2851 2852static void cma_bind_port(struct rdma_bind_list *bind_list, 2853 struct rdma_id_private *id_priv) 2854{ 2855 struct sockaddr *addr; 2856 struct sockaddr_ib *sib; 2857 u64 sid, mask; 2858 __be16 port; 2859 2860 addr = cma_src_addr(id_priv); 2861 port = htons(bind_list->port); 2862 2863 switch (addr->sa_family) { 2864 case AF_INET: 2865 ((struct sockaddr_in *) addr)->sin_port = port; 2866 break; 2867 case AF_INET6: 2868 ((struct sockaddr_in6 *) addr)->sin6_port = port; 2869 break; 2870 case AF_IB: 2871 sib = (struct sockaddr_ib *) addr; 2872 sid = be64_to_cpu(sib->sib_sid); 2873 mask = be64_to_cpu(sib->sib_sid_mask); 2874 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 2875 sib->sib_sid_mask = cpu_to_be64(~0ULL); 2876 break; 2877 } 2878 id_priv->bind_list = bind_list; 2879 hlist_add_head(&id_priv->node, &bind_list->owners); 2880} 2881 2882static int cma_alloc_port(enum rdma_port_space ps, 2883 struct rdma_id_private *id_priv, unsigned short snum) 2884{ 2885 struct rdma_bind_list *bind_list; 2886 int ret; 2887 2888 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 2889 if (!bind_list) 2890 return -ENOMEM; 2891 2892 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 2893 snum); 2894 if (ret < 0) 2895 goto err; 2896 2897 bind_list->ps = ps; 2898 bind_list->port = (unsigned short)ret; 2899 cma_bind_port(bind_list, id_priv); 2900 return 0; 2901err: 2902 kfree(bind_list); 2903 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 2904} 2905 2906static int cma_alloc_any_port(enum rdma_port_space ps, 2907 struct rdma_id_private *id_priv) 2908{ 2909 static unsigned int last_used_port; 2910 int low, high, remaining; 2911 unsigned int rover; 2912 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 2913 u32 rand; 2914 2915 inet_get_local_port_range(net, &low, &high); 2916 remaining = (high - low) + 1; 2917 get_random_bytes(&rand, sizeof(rand)); 2918 rover = rand % remaining + low; 2919retry: 2920 if (last_used_port != rover && 2921 !cma_ps_find(net, ps, (unsigned short)rover)) { 2922 int ret = cma_alloc_port(ps, id_priv, rover); 2923 /* 2924 * Remember previously used port number in order to avoid 2925 * re-using same port immediately after it is closed. 2926 */ 2927 if (!ret) 2928 last_used_port = rover; 2929 if (ret != -EADDRNOTAVAIL) 2930 return ret; 2931 } 2932 if (--remaining) { 2933 rover++; 2934 if ((rover < low) || (rover > high)) 2935 rover = low; 2936 goto retry; 2937 } 2938 return -EADDRNOTAVAIL; 2939} 2940 2941/* 2942 * Check that the requested port is available. This is called when trying to 2943 * bind to a specific port, or when trying to listen on a bound port. In 2944 * the latter case, the provided id_priv may already be on the bind_list, but 2945 * we still need to check that it's okay to start listening. 2946 */ 2947static int cma_check_port(struct rdma_bind_list *bind_list, 2948 struct rdma_id_private *id_priv, uint8_t reuseaddr) 2949{ 2950 struct rdma_id_private *cur_id; 2951 struct sockaddr *addr, *cur_addr; 2952 2953 addr = cma_src_addr(id_priv); 2954 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 2955 if (id_priv == cur_id) 2956 continue; 2957 2958 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 2959 cur_id->reuseaddr) 2960 continue; 2961 2962 cur_addr = cma_src_addr(cur_id); 2963 if (id_priv->afonly && cur_id->afonly && 2964 (addr->sa_family != cur_addr->sa_family)) 2965 continue; 2966 2967 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 2968 return -EADDRNOTAVAIL; 2969 2970 if (!cma_addr_cmp(addr, cur_addr)) 2971 return -EADDRINUSE; 2972 } 2973 return 0; 2974} 2975 2976static int cma_use_port(enum rdma_port_space ps, 2977 struct rdma_id_private *id_priv) 2978{ 2979 struct rdma_bind_list *bind_list; 2980 unsigned short snum; 2981 int ret; 2982 2983 snum = ntohs(cma_port(cma_src_addr(id_priv))); 2984 if (snum < IPPORT_RESERVED && 2985 priv_check(curthread, PRIV_NETINET_BINDANY) != 0) 2986 return -EACCES; 2987 2988 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 2989 if (!bind_list) { 2990 ret = cma_alloc_port(ps, id_priv, snum); 2991 } else { 2992 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 2993 if (!ret) 2994 cma_bind_port(bind_list, id_priv); 2995 } 2996 return ret; 2997} 2998 2999static int cma_bind_listen(struct rdma_id_private *id_priv) 3000{ 3001 struct rdma_bind_list *bind_list = id_priv->bind_list; 3002 int ret = 0; 3003 3004 mutex_lock(&lock); 3005 if (bind_list->owners.first->next) 3006 ret = cma_check_port(bind_list, id_priv, 0); 3007 mutex_unlock(&lock); 3008 return ret; 3009} 3010 3011static enum rdma_port_space cma_select_inet_ps( 3012 struct rdma_id_private *id_priv) 3013{ 3014 switch (id_priv->id.ps) { 3015 case RDMA_PS_TCP: 3016 case RDMA_PS_UDP: 3017 case RDMA_PS_IPOIB: 3018 case RDMA_PS_IB: 3019 return id_priv->id.ps; 3020 default: 3021 3022 return 0; 3023 } 3024} 3025 3026static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) 3027{ 3028 enum rdma_port_space ps = 0; 3029 struct sockaddr_ib *sib; 3030 u64 sid_ps, mask, sid; 3031 3032 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3033 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3034 sid = be64_to_cpu(sib->sib_sid) & mask; 3035 3036 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3037 sid_ps = RDMA_IB_IP_PS_IB; 3038 ps = RDMA_PS_IB; 3039 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3040 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3041 sid_ps = RDMA_IB_IP_PS_TCP; 3042 ps = RDMA_PS_TCP; 3043 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3044 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3045 sid_ps = RDMA_IB_IP_PS_UDP; 3046 ps = RDMA_PS_UDP; 3047 } 3048 3049 if (ps) { 3050 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3051 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3052 be64_to_cpu(sib->sib_sid_mask)); 3053 } 3054 return ps; 3055} 3056 3057static int cma_get_port(struct rdma_id_private *id_priv) 3058{ 3059 enum rdma_port_space ps; 3060 int ret; 3061 3062 if (cma_family(id_priv) != AF_IB) 3063 ps = cma_select_inet_ps(id_priv); 3064 else 3065 ps = cma_select_ib_ps(id_priv); 3066 if (!ps) 3067 return -EPROTONOSUPPORT; 3068 3069 mutex_lock(&lock); 3070 if (cma_any_port(cma_src_addr(id_priv))) 3071 ret = cma_alloc_any_port(ps, id_priv); 3072 else 3073 ret = cma_use_port(ps, id_priv); 3074 mutex_unlock(&lock); 3075 3076 return ret; 3077} 3078 3079static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3080 struct sockaddr *addr) 3081{ 3082#ifdef INET6 3083 struct sockaddr_in6 sin6; 3084 3085 if (addr->sa_family != AF_INET6) 3086 return 0; 3087 3088 sin6 = *(struct sockaddr_in6 *)addr; 3089 3090 if (!(IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr))) 3091 return 0; 3092 3093 if (sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0) 3094 return -EINVAL; 3095 3096 dev_addr->bound_dev_if = sin6.sin6_scope_id; 3097#endif 3098 return 0; 3099} 3100 3101int rdma_listen(struct rdma_cm_id *id, int backlog) 3102{ 3103 struct rdma_id_private *id_priv; 3104 int ret; 3105 3106 id_priv = container_of(id, struct rdma_id_private, id); 3107 if (id_priv->state == RDMA_CM_IDLE) { 3108 id->route.addr.src_addr.ss_family = AF_INET; 3109 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3110 if (ret) 3111 return ret; 3112 } 3113 3114 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3115 return -EINVAL; 3116 3117 if (id_priv->reuseaddr) { 3118 ret = cma_bind_listen(id_priv); 3119 if (ret) 3120 goto err; 3121 } 3122 3123 id_priv->backlog = backlog; 3124 if (id->device) { 3125 if (rdma_cap_ib_cm(id->device, 1)) { 3126 ret = cma_ib_listen(id_priv); 3127 if (ret) 3128 goto err; 3129 } else if (rdma_cap_iw_cm(id->device, 1)) { 3130 ret = cma_iw_listen(id_priv, backlog); 3131 if (ret) 3132 goto err; 3133 } else { 3134 ret = -ENOSYS; 3135 goto err; 3136 } 3137 } else 3138 cma_listen_on_all(id_priv); 3139 3140 return 0; 3141err: 3142 id_priv->backlog = 0; 3143 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3144 return ret; 3145} 3146EXPORT_SYMBOL(rdma_listen); 3147 3148int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3149{ 3150 struct rdma_id_private *id_priv; 3151 int ret; 3152 3153 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3154 addr->sa_family != AF_IB) 3155 return -EAFNOSUPPORT; 3156 3157 id_priv = container_of(id, struct rdma_id_private, id); 3158 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3159 return -EINVAL; 3160 3161 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3162 if (ret) 3163 goto err1; 3164 3165 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3166 if (!cma_any_addr(addr)) { 3167 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3168 if (ret) 3169 goto err1; 3170 3171 ret = cma_acquire_dev(id_priv, NULL); 3172 if (ret) 3173 goto err1; 3174 } 3175 3176 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3177 if (addr->sa_family == AF_INET) 3178 id_priv->afonly = 1; 3179#ifdef INET6 3180 else if (addr->sa_family == AF_INET6) { 3181 CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net); 3182 id_priv->afonly = V_ip6_v6only; 3183 CURVNET_RESTORE(); 3184 } 3185#endif 3186 } 3187 ret = cma_get_port(id_priv); 3188 if (ret) 3189 goto err2; 3190 3191 return 0; 3192err2: 3193 if (id_priv->cma_dev) 3194 cma_release_dev(id_priv); 3195err1: 3196 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3197 return ret; 3198} 3199EXPORT_SYMBOL(rdma_bind_addr); 3200 3201static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3202{ 3203 struct cma_hdr *cma_hdr; 3204 3205 cma_hdr = hdr; 3206 cma_hdr->cma_version = CMA_VERSION; 3207 if (cma_family(id_priv) == AF_INET) { 3208 struct sockaddr_in *src4, *dst4; 3209 3210 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3211 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3212 3213 cma_set_ip_ver(cma_hdr, 4); 3214 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3215 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3216 cma_hdr->port = src4->sin_port; 3217 } else if (cma_family(id_priv) == AF_INET6) { 3218 struct sockaddr_in6 *src6, *dst6; 3219 3220 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3221 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3222 3223 cma_set_ip_ver(cma_hdr, 6); 3224 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3225 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3226 cma_hdr->port = src6->sin6_port; 3227 } 3228 return 0; 3229} 3230 3231static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3232 struct ib_cm_event *ib_event) 3233{ 3234 struct rdma_id_private *id_priv = cm_id->context; 3235 struct rdma_cm_event event; 3236 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3237 int ret = 0; 3238 3239 mutex_lock(&id_priv->handler_mutex); 3240 if (id_priv->state != RDMA_CM_CONNECT) 3241 goto out; 3242 3243 memset(&event, 0, sizeof event); 3244 switch (ib_event->event) { 3245 case IB_CM_SIDR_REQ_ERROR: 3246 event.event = RDMA_CM_EVENT_UNREACHABLE; 3247 event.status = -ETIMEDOUT; 3248 break; 3249 case IB_CM_SIDR_REP_RECEIVED: 3250 event.param.ud.private_data = ib_event->private_data; 3251 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3252 if (rep->status != IB_SIDR_SUCCESS) { 3253 event.event = RDMA_CM_EVENT_UNREACHABLE; 3254 event.status = ib_event->param.sidr_rep_rcvd.status; 3255 break; 3256 } 3257 ret = cma_set_qkey(id_priv, rep->qkey); 3258 if (ret) { 3259 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3260 event.status = ret; 3261 break; 3262 } 3263 ret = ib_init_ah_from_path(id_priv->id.device, 3264 id_priv->id.port_num, 3265 id_priv->id.route.path_rec, 3266 &event.param.ud.ah_attr); 3267 if (ret) { 3268 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3269 event.status = ret; 3270 break; 3271 } 3272 event.param.ud.qp_num = rep->qpn; 3273 event.param.ud.qkey = rep->qkey; 3274 event.event = RDMA_CM_EVENT_ESTABLISHED; 3275 event.status = 0; 3276 break; 3277 default: 3278 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3279 ib_event->event); 3280 goto out; 3281 } 3282 3283 ret = id_priv->id.event_handler(&id_priv->id, &event); 3284 if (ret) { 3285 /* Destroy the CM ID by returning a non-zero value. */ 3286 id_priv->cm_id.ib = NULL; 3287 cma_exch(id_priv, RDMA_CM_DESTROYING); 3288 mutex_unlock(&id_priv->handler_mutex); 3289 rdma_destroy_id(&id_priv->id); 3290 return ret; 3291 } 3292out: 3293 mutex_unlock(&id_priv->handler_mutex); 3294 return ret; 3295} 3296 3297static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3298 struct rdma_conn_param *conn_param) 3299{ 3300 struct ib_cm_sidr_req_param req; 3301 struct ib_cm_id *id; 3302 void *private_data; 3303 int offset, ret; 3304 3305 memset(&req, 0, sizeof req); 3306 offset = cma_user_data_offset(id_priv); 3307 req.private_data_len = offset + conn_param->private_data_len; 3308 if (req.private_data_len < conn_param->private_data_len) 3309 return -EINVAL; 3310 3311 if (req.private_data_len) { 3312 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3313 if (!private_data) 3314 return -ENOMEM; 3315 } else { 3316 private_data = NULL; 3317 } 3318 3319 if (conn_param->private_data && conn_param->private_data_len) 3320 memcpy((char *)private_data + offset, conn_param->private_data, 3321 conn_param->private_data_len); 3322 3323 if (private_data) { 3324 ret = cma_format_hdr(private_data, id_priv); 3325 if (ret) 3326 goto out; 3327 req.private_data = private_data; 3328 } 3329 3330 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3331 id_priv); 3332 if (IS_ERR(id)) { 3333 ret = PTR_ERR(id); 3334 goto out; 3335 } 3336 id_priv->cm_id.ib = id; 3337 3338 req.path = id_priv->id.route.path_rec; 3339 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3340 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3341 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3342 3343 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3344 if (ret) { 3345 ib_destroy_cm_id(id_priv->cm_id.ib); 3346 id_priv->cm_id.ib = NULL; 3347 } 3348out: 3349 kfree(private_data); 3350 return ret; 3351} 3352 3353static int cma_connect_ib(struct rdma_id_private *id_priv, 3354 struct rdma_conn_param *conn_param) 3355{ 3356 struct ib_cm_req_param req; 3357 struct rdma_route *route; 3358 void *private_data; 3359 struct ib_cm_id *id; 3360 int offset, ret; 3361 3362 memset(&req, 0, sizeof req); 3363 offset = cma_user_data_offset(id_priv); 3364 req.private_data_len = offset + conn_param->private_data_len; 3365 if (req.private_data_len < conn_param->private_data_len) 3366 return -EINVAL; 3367 3368 if (req.private_data_len) { 3369 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3370 if (!private_data) 3371 return -ENOMEM; 3372 } else { 3373 private_data = NULL; 3374 } 3375 3376 if (conn_param->private_data && conn_param->private_data_len) 3377 memcpy((char *)private_data + offset, conn_param->private_data, 3378 conn_param->private_data_len); 3379 3380 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3381 if (IS_ERR(id)) { 3382 ret = PTR_ERR(id); 3383 goto out; 3384 } 3385 id_priv->cm_id.ib = id; 3386 3387 route = &id_priv->id.route; 3388 if (private_data) { 3389 ret = cma_format_hdr(private_data, id_priv); 3390 if (ret) 3391 goto out; 3392 req.private_data = private_data; 3393 } 3394 3395 req.primary_path = &route->path_rec[0]; 3396 if (route->num_paths == 2) 3397 req.alternate_path = &route->path_rec[1]; 3398 3399 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3400 req.qp_num = id_priv->qp_num; 3401 req.qp_type = id_priv->id.qp_type; 3402 req.starting_psn = id_priv->seq_num; 3403 req.responder_resources = conn_param->responder_resources; 3404 req.initiator_depth = conn_param->initiator_depth; 3405 req.flow_control = conn_param->flow_control; 3406 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3407 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3408 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3409 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3410 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3411 req.srq = id_priv->srq ? 1 : 0; 3412 3413 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3414out: 3415 if (ret && !IS_ERR(id)) { 3416 ib_destroy_cm_id(id); 3417 id_priv->cm_id.ib = NULL; 3418 } 3419 3420 kfree(private_data); 3421 return ret; 3422} 3423 3424static int cma_connect_iw(struct rdma_id_private *id_priv, 3425 struct rdma_conn_param *conn_param) 3426{ 3427 struct iw_cm_id *cm_id; 3428 int ret; 3429 struct iw_cm_conn_param iw_param; 3430 3431 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3432 if (IS_ERR(cm_id)) 3433 return PTR_ERR(cm_id); 3434 3435 cm_id->tos = id_priv->tos; 3436 id_priv->cm_id.iw = cm_id; 3437 3438 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3439 rdma_addr_size(cma_src_addr(id_priv))); 3440 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3441 rdma_addr_size(cma_dst_addr(id_priv))); 3442 3443 ret = cma_modify_qp_rtr(id_priv, conn_param); 3444 if (ret) 3445 goto out; 3446 3447 if (conn_param) { 3448 iw_param.ord = conn_param->initiator_depth; 3449 iw_param.ird = conn_param->responder_resources; 3450 iw_param.private_data = conn_param->private_data; 3451 iw_param.private_data_len = conn_param->private_data_len; 3452 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3453 } else { 3454 memset(&iw_param, 0, sizeof iw_param); 3455 iw_param.qpn = id_priv->qp_num; 3456 } 3457 ret = iw_cm_connect(cm_id, &iw_param); 3458out: 3459 if (ret) { 3460 iw_destroy_cm_id(cm_id); 3461 id_priv->cm_id.iw = NULL; 3462 } 3463 return ret; 3464} 3465 3466int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3467{ 3468 struct rdma_id_private *id_priv; 3469 int ret; 3470 3471 id_priv = container_of(id, struct rdma_id_private, id); 3472 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3473 return -EINVAL; 3474 3475 if (!id->qp) { 3476 id_priv->qp_num = conn_param->qp_num; 3477 id_priv->srq = conn_param->srq; 3478 } 3479 3480 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3481 if (id->qp_type == IB_QPT_UD) 3482 ret = cma_resolve_ib_udp(id_priv, conn_param); 3483 else 3484 ret = cma_connect_ib(id_priv, conn_param); 3485 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3486 ret = cma_connect_iw(id_priv, conn_param); 3487 else 3488 ret = -ENOSYS; 3489 if (ret) 3490 goto err; 3491 3492 return 0; 3493err: 3494 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3495 return ret; 3496} 3497EXPORT_SYMBOL(rdma_connect); 3498 3499static int cma_accept_ib(struct rdma_id_private *id_priv, 3500 struct rdma_conn_param *conn_param) 3501{ 3502 struct ib_cm_rep_param rep; 3503 int ret; 3504 3505 ret = cma_modify_qp_rtr(id_priv, conn_param); 3506 if (ret) 3507 goto out; 3508 3509 ret = cma_modify_qp_rts(id_priv, conn_param); 3510 if (ret) 3511 goto out; 3512 3513 memset(&rep, 0, sizeof rep); 3514 rep.qp_num = id_priv->qp_num; 3515 rep.starting_psn = id_priv->seq_num; 3516 rep.private_data = conn_param->private_data; 3517 rep.private_data_len = conn_param->private_data_len; 3518 rep.responder_resources = conn_param->responder_resources; 3519 rep.initiator_depth = conn_param->initiator_depth; 3520 rep.failover_accepted = 0; 3521 rep.flow_control = conn_param->flow_control; 3522 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3523 rep.srq = id_priv->srq ? 1 : 0; 3524 3525 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3526out: 3527 return ret; 3528} 3529 3530static int cma_accept_iw(struct rdma_id_private *id_priv, 3531 struct rdma_conn_param *conn_param) 3532{ 3533 struct iw_cm_conn_param iw_param; 3534 int ret; 3535 3536 ret = cma_modify_qp_rtr(id_priv, conn_param); 3537 if (ret) 3538 return ret; 3539 3540 iw_param.ord = conn_param->initiator_depth; 3541 iw_param.ird = conn_param->responder_resources; 3542 iw_param.private_data = conn_param->private_data; 3543 iw_param.private_data_len = conn_param->private_data_len; 3544 if (id_priv->id.qp) { 3545 iw_param.qpn = id_priv->qp_num; 3546 } else 3547 iw_param.qpn = conn_param->qp_num; 3548 3549 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3550} 3551 3552static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3553 enum ib_cm_sidr_status status, u32 qkey, 3554 const void *private_data, int private_data_len) 3555{ 3556 struct ib_cm_sidr_rep_param rep; 3557 int ret; 3558 3559 memset(&rep, 0, sizeof rep); 3560 rep.status = status; 3561 if (status == IB_SIDR_SUCCESS) { 3562 ret = cma_set_qkey(id_priv, qkey); 3563 if (ret) 3564 return ret; 3565 rep.qp_num = id_priv->qp_num; 3566 rep.qkey = id_priv->qkey; 3567 } 3568 rep.private_data = private_data; 3569 rep.private_data_len = private_data_len; 3570 3571 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3572} 3573 3574int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3575{ 3576 struct rdma_id_private *id_priv; 3577 int ret; 3578 3579 id_priv = container_of(id, struct rdma_id_private, id); 3580 3581 id_priv->owner = task_pid_nr(current); 3582 3583 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3584 return -EINVAL; 3585 3586 if (!id->qp && conn_param) { 3587 id_priv->qp_num = conn_param->qp_num; 3588 id_priv->srq = conn_param->srq; 3589 } 3590 3591 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3592 if (id->qp_type == IB_QPT_UD) { 3593 if (conn_param) 3594 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3595 conn_param->qkey, 3596 conn_param->private_data, 3597 conn_param->private_data_len); 3598 else 3599 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3600 0, NULL, 0); 3601 } else { 3602 if (conn_param) 3603 ret = cma_accept_ib(id_priv, conn_param); 3604 else 3605 ret = cma_rep_recv(id_priv); 3606 } 3607 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3608 ret = cma_accept_iw(id_priv, conn_param); 3609 else 3610 ret = -ENOSYS; 3611 3612 if (ret) 3613 goto reject; 3614 3615 return 0; 3616reject: 3617 cma_modify_qp_err(id_priv); 3618 rdma_reject(id, NULL, 0); 3619 return ret; 3620} 3621EXPORT_SYMBOL(rdma_accept); 3622 3623int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3624{ 3625 struct rdma_id_private *id_priv; 3626 int ret; 3627 3628 id_priv = container_of(id, struct rdma_id_private, id); 3629 if (!id_priv->cm_id.ib) 3630 return -EINVAL; 3631 3632 switch (id->device->node_type) { 3633 case RDMA_NODE_IB_CA: 3634 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3635 break; 3636 default: 3637 ret = 0; 3638 break; 3639 } 3640 return ret; 3641} 3642EXPORT_SYMBOL(rdma_notify); 3643 3644int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3645 u8 private_data_len) 3646{ 3647 struct rdma_id_private *id_priv; 3648 int ret; 3649 3650 id_priv = container_of(id, struct rdma_id_private, id); 3651 if (!id_priv->cm_id.ib) 3652 return -EINVAL; 3653 3654 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3655 if (id->qp_type == IB_QPT_UD) 3656 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3657 private_data, private_data_len); 3658 else 3659 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3660 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3661 0, private_data, private_data_len); 3662 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3663 ret = iw_cm_reject(id_priv->cm_id.iw, 3664 private_data, private_data_len); 3665 } else 3666 ret = -ENOSYS; 3667 3668 return ret; 3669} 3670EXPORT_SYMBOL(rdma_reject); 3671 3672int rdma_disconnect(struct rdma_cm_id *id) 3673{ 3674 struct rdma_id_private *id_priv; 3675 int ret; 3676 3677 id_priv = container_of(id, struct rdma_id_private, id); 3678 if (!id_priv->cm_id.ib) 3679 return -EINVAL; 3680 3681 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3682 ret = cma_modify_qp_err(id_priv); 3683 if (ret) 3684 goto out; 3685 /* Initiate or respond to a disconnect. */ 3686 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 3687 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 3688 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3689 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 3690 } else 3691 ret = -EINVAL; 3692 3693out: 3694 return ret; 3695} 3696EXPORT_SYMBOL(rdma_disconnect); 3697 3698static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 3699{ 3700 struct rdma_id_private *id_priv; 3701 struct cma_multicast *mc = multicast->context; 3702 struct rdma_cm_event event; 3703 int ret = 0; 3704 3705 id_priv = mc->id_priv; 3706 mutex_lock(&id_priv->handler_mutex); 3707 if (id_priv->state != RDMA_CM_ADDR_BOUND && 3708 id_priv->state != RDMA_CM_ADDR_RESOLVED) 3709 goto out; 3710 3711 if (!status) 3712 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 3713 mutex_lock(&id_priv->qp_mutex); 3714 if (!status && id_priv->id.qp) 3715 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3716 be16_to_cpu(multicast->rec.mlid)); 3717 mutex_unlock(&id_priv->qp_mutex); 3718 3719 memset(&event, 0, sizeof event); 3720 event.status = status; 3721 event.param.ud.private_data = mc->context; 3722 if (!status) { 3723 struct rdma_dev_addr *dev_addr = 3724 &id_priv->id.route.addr.dev_addr; 3725 struct net_device *ndev = 3726 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 3727 enum ib_gid_type gid_type = 3728 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3729 rdma_start_port(id_priv->cma_dev->device)]; 3730 3731 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 3732 ib_init_ah_from_mcmember(id_priv->id.device, 3733 id_priv->id.port_num, &multicast->rec, 3734 ndev, gid_type, 3735 &event.param.ud.ah_attr); 3736 event.param.ud.qp_num = 0xFFFFFF; 3737 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 3738 if (ndev) 3739 dev_put(ndev); 3740 } else 3741 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3742 3743 ret = id_priv->id.event_handler(&id_priv->id, &event); 3744 if (ret) { 3745 cma_exch(id_priv, RDMA_CM_DESTROYING); 3746 mutex_unlock(&id_priv->handler_mutex); 3747 rdma_destroy_id(&id_priv->id); 3748 return 0; 3749 } 3750 3751out: 3752 mutex_unlock(&id_priv->handler_mutex); 3753 return 0; 3754} 3755 3756static void cma_set_mgid(struct rdma_id_private *id_priv, 3757 struct sockaddr *addr, union ib_gid *mgid) 3758{ 3759 unsigned char mc_map[MAX_ADDR_LEN]; 3760 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3761 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 3762 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 3763 3764 if (cma_any_addr(addr)) { 3765 memset(mgid, 0, sizeof *mgid); 3766 } else if ((addr->sa_family == AF_INET6) && 3767 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 3768 0xFF10A01B)) { 3769 /* IPv6 address is an SA assigned MGID. */ 3770 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3771 } else if (addr->sa_family == AF_IB) { 3772 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 3773 } else if (addr->sa_family == AF_INET6) { 3774 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 3775 if (id_priv->id.ps == RDMA_PS_UDP) 3776 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3777 *mgid = *(union ib_gid *) (mc_map + 4); 3778 } else { 3779 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 3780 if (id_priv->id.ps == RDMA_PS_UDP) 3781 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3782 *mgid = *(union ib_gid *) (mc_map + 4); 3783 } 3784} 3785 3786static void cma_query_sa_classport_info_cb(int status, 3787 struct ib_class_port_info *rec, 3788 void *context) 3789{ 3790 struct class_port_info_context *cb_ctx = context; 3791 3792 WARN_ON(!context); 3793 3794 if (status || !rec) { 3795 pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", 3796 cb_ctx->device->name, cb_ctx->port_num, status); 3797 goto out; 3798 } 3799 3800 memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); 3801 3802out: 3803 complete(&cb_ctx->done); 3804} 3805 3806static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, 3807 struct ib_class_port_info *class_port_info) 3808{ 3809 struct class_port_info_context *cb_ctx; 3810 int ret; 3811 3812 cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); 3813 if (!cb_ctx) 3814 return -ENOMEM; 3815 3816 cb_ctx->device = device; 3817 cb_ctx->class_port_info = class_port_info; 3818 cb_ctx->port_num = port_num; 3819 init_completion(&cb_ctx->done); 3820 3821 ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, 3822 CMA_QUERY_CLASSPORT_INFO_TIMEOUT, 3823 GFP_KERNEL, cma_query_sa_classport_info_cb, 3824 cb_ctx, &cb_ctx->sa_query); 3825 if (ret < 0) { 3826 pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", 3827 device->name, port_num, ret); 3828 goto out; 3829 } 3830 3831 wait_for_completion(&cb_ctx->done); 3832 3833out: 3834 kfree(cb_ctx); 3835 return ret; 3836} 3837 3838static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 3839 struct cma_multicast *mc) 3840{ 3841 struct ib_sa_mcmember_rec rec; 3842 struct ib_class_port_info class_port_info; 3843 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3844 ib_sa_comp_mask comp_mask; 3845 int ret; 3846 3847 ib_addr_get_mgid(dev_addr, &rec.mgid); 3848 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 3849 &rec.mgid, &rec); 3850 if (ret) 3851 return ret; 3852 3853 ret = cma_set_qkey(id_priv, 0); 3854 if (ret) 3855 return ret; 3856 3857 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 3858 rec.qkey = cpu_to_be32(id_priv->qkey); 3859 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 3860 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 3861 rec.join_state = mc->join_state; 3862 3863 if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { 3864 ret = cma_query_sa_classport_info(id_priv->id.device, 3865 id_priv->id.port_num, 3866 &class_port_info); 3867 3868 if (ret) 3869 return ret; 3870 3871 if (!(ib_get_cpi_capmask2(&class_port_info) & 3872 IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { 3873 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 3874 "RDMA CM: SM doesn't support Send Only Full Member option\n", 3875 id_priv->id.device->name, id_priv->id.port_num); 3876 return -EOPNOTSUPP; 3877 } 3878 } 3879 3880 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 3881 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 3882 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 3883 IB_SA_MCMEMBER_REC_FLOW_LABEL | 3884 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 3885 3886 if (id_priv->id.ps == RDMA_PS_IPOIB) 3887 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 3888 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 3889 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 3890 IB_SA_MCMEMBER_REC_MTU | 3891 IB_SA_MCMEMBER_REC_HOP_LIMIT; 3892 3893 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 3894 id_priv->id.port_num, &rec, 3895 comp_mask, GFP_KERNEL, 3896 cma_ib_mc_handler, mc); 3897 return PTR_ERR_OR_ZERO(mc->multicast.ib); 3898} 3899 3900static void iboe_mcast_work_handler(struct work_struct *work) 3901{ 3902 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 3903 struct cma_multicast *mc = mw->mc; 3904 struct ib_sa_multicast *m = mc->multicast.ib; 3905 3906 mc->multicast.ib->context = mc; 3907 cma_ib_mc_handler(0, m); 3908 kref_put(&mc->mcref, release_mc); 3909 kfree(mw); 3910} 3911 3912static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) 3913{ 3914 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 3915 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 3916 3917 if (cma_any_addr(addr)) { 3918 memset(mgid, 0, sizeof *mgid); 3919 } else if (addr->sa_family == AF_INET6) { 3920 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3921 } else { 3922 mgid->raw[0] = 0xff; 3923 mgid->raw[1] = 0x0e; 3924 mgid->raw[2] = 0; 3925 mgid->raw[3] = 0; 3926 mgid->raw[4] = 0; 3927 mgid->raw[5] = 0; 3928 mgid->raw[6] = 0; 3929 mgid->raw[7] = 0; 3930 mgid->raw[8] = 0; 3931 mgid->raw[9] = 0; 3932 mgid->raw[10] = 0xff; 3933 mgid->raw[11] = 0xff; 3934 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 3935 } 3936} 3937 3938static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 3939 struct cma_multicast *mc) 3940{ 3941 struct iboe_mcast_work *work; 3942 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3943 int err = 0; 3944 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 3945 struct net_device *ndev = NULL; 3946 enum ib_gid_type gid_type; 3947 bool send_only; 3948 3949 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 3950 3951 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 3952 return -EINVAL; 3953 3954 work = kzalloc(sizeof *work, GFP_KERNEL); 3955 if (!work) 3956 return -ENOMEM; 3957 3958 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 3959 if (!mc->multicast.ib) { 3960 err = -ENOMEM; 3961 goto out1; 3962 } 3963 3964 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); 3965 3966 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 3967 if (id_priv->id.ps == RDMA_PS_UDP) 3968 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 3969 3970 if (dev_addr->bound_dev_if) 3971 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 3972 if (!ndev) { 3973 err = -ENODEV; 3974 goto out2; 3975 } 3976 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 3977 mc->multicast.ib->rec.hop_limit = 1; 3978 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); 3979 3980 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3981 rdma_start_port(id_priv->cma_dev->device)]; 3982 if (addr->sa_family == AF_INET) { 3983 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 3984 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 3985 if (!send_only) { 3986 mc->igmp_joined = true; 3987 } 3988 } 3989 } else { 3990 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 3991 err = -ENOTSUPP; 3992 } 3993 dev_put(ndev); 3994 if (err || !mc->multicast.ib->rec.mtu) { 3995 if (!err) 3996 err = -EINVAL; 3997 goto out2; 3998 } 3999 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4000 &mc->multicast.ib->rec.port_gid); 4001 work->id = id_priv; 4002 work->mc = mc; 4003 INIT_WORK(&work->work, iboe_mcast_work_handler); 4004 kref_get(&mc->mcref); 4005 queue_work(cma_wq, &work->work); 4006 4007 return 0; 4008 4009out2: 4010 kfree(mc->multicast.ib); 4011out1: 4012 kfree(work); 4013 return err; 4014} 4015 4016int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4017 u8 join_state, void *context) 4018{ 4019 struct rdma_id_private *id_priv; 4020 struct cma_multicast *mc; 4021 int ret; 4022 4023 id_priv = container_of(id, struct rdma_id_private, id); 4024 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4025 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4026 return -EINVAL; 4027 4028 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4029 if (!mc) 4030 return -ENOMEM; 4031 4032 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4033 mc->context = context; 4034 mc->id_priv = id_priv; 4035 mc->igmp_joined = false; 4036 mc->join_state = join_state; 4037 spin_lock(&id_priv->lock); 4038 list_add(&mc->list, &id_priv->mc_list); 4039 spin_unlock(&id_priv->lock); 4040 4041 if (rdma_protocol_roce(id->device, id->port_num)) { 4042 kref_init(&mc->mcref); 4043 ret = cma_iboe_join_multicast(id_priv, mc); 4044 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4045 ret = cma_join_ib_multicast(id_priv, mc); 4046 else 4047 ret = -ENOSYS; 4048 4049 if (ret) { 4050 spin_lock_irq(&id_priv->lock); 4051 list_del(&mc->list); 4052 spin_unlock_irq(&id_priv->lock); 4053 kfree(mc); 4054 } 4055 return ret; 4056} 4057EXPORT_SYMBOL(rdma_join_multicast); 4058 4059void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4060{ 4061 struct rdma_id_private *id_priv; 4062 struct cma_multicast *mc; 4063 4064 id_priv = container_of(id, struct rdma_id_private, id); 4065 spin_lock_irq(&id_priv->lock); 4066 list_for_each_entry(mc, &id_priv->mc_list, list) { 4067 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4068 list_del(&mc->list); 4069 spin_unlock_irq(&id_priv->lock); 4070 4071 if (id->qp) 4072 ib_detach_mcast(id->qp, 4073 &mc->multicast.ib->rec.mgid, 4074 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4075 4076 BUG_ON(id_priv->cma_dev->device != id->device); 4077 4078 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4079 ib_sa_free_multicast(mc->multicast.ib); 4080 kfree(mc); 4081 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4082 if (mc->igmp_joined) { 4083 struct rdma_dev_addr *dev_addr = 4084 &id->route.addr.dev_addr; 4085 struct net_device *ndev = NULL; 4086 4087 if (dev_addr->bound_dev_if) 4088 ndev = dev_get_by_index(dev_addr->net, 4089 dev_addr->bound_dev_if); 4090 if (ndev) { 4091 dev_put(ndev); 4092 } 4093 mc->igmp_joined = false; 4094 } 4095 kref_put(&mc->mcref, release_mc); 4096 } 4097 return; 4098 } 4099 } 4100 spin_unlock_irq(&id_priv->lock); 4101} 4102EXPORT_SYMBOL(rdma_leave_multicast); 4103 4104static int 4105sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS) 4106{ 4107 struct cma_device *cma_dev = arg1; 4108 const int port = arg2; 4109 char buf[64]; 4110 int error; 4111 4112 strlcpy(buf, ib_cache_gid_type_str( 4113 cma_get_default_gid_type(cma_dev, port)), sizeof(buf)); 4114 4115 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 4116 if (error != 0 || req->newptr == NULL) 4117 goto done; 4118 4119 error = ib_cache_gid_parse_type_str(buf); 4120 if (error < 0) { 4121 error = EINVAL; 4122 goto done; 4123 } 4124 4125 cma_set_default_gid_type(cma_dev, port, error); 4126 error = 0; 4127done: 4128 return (error); 4129} 4130 4131static void cma_add_one(struct ib_device *device) 4132{ 4133 struct cma_device *cma_dev; 4134 struct rdma_id_private *id_priv; 4135 unsigned int i; 4136 unsigned long supported_gids = 0; 4137 4138 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4139 if (!cma_dev) 4140 return; 4141 4142 sysctl_ctx_init(&cma_dev->sysctl_ctx); 4143 4144 cma_dev->device = device; 4145 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4146 sizeof(*cma_dev->default_gid_type), 4147 GFP_KERNEL); 4148 if (!cma_dev->default_gid_type) { 4149 kfree(cma_dev); 4150 return; 4151 } 4152 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4153 supported_gids = roce_gid_type_mask_support(device, i); 4154 WARN_ON(!supported_gids); 4155 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4156 find_first_bit(&supported_gids, BITS_PER_LONG); 4157 } 4158 4159 init_completion(&cma_dev->comp); 4160 atomic_set(&cma_dev->refcount, 1); 4161 INIT_LIST_HEAD(&cma_dev->id_list); 4162 ib_set_client_data(device, &cma_client, cma_dev); 4163 4164 mutex_lock(&lock); 4165 list_add_tail(&cma_dev->list, &dev_list); 4166 list_for_each_entry(id_priv, &listen_any_list, list) 4167 cma_listen_on_dev(id_priv, cma_dev); 4168 mutex_unlock(&lock); 4169 4170 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4171 char buf[64]; 4172 4173 snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i); 4174 4175 (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx, 4176 SYSCTL_CHILDREN(device->ports_parent->parent->oidp), 4177 OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 4178 cma_dev, i, &sysctl_cma_default_roce_mode, "A", 4179 "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2"); 4180 } 4181} 4182 4183static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4184{ 4185 struct rdma_cm_event event; 4186 enum rdma_cm_state state; 4187 int ret = 0; 4188 4189 /* Record that we want to remove the device */ 4190 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4191 if (state == RDMA_CM_DESTROYING) 4192 return 0; 4193 4194 cma_cancel_operation(id_priv, state); 4195 mutex_lock(&id_priv->handler_mutex); 4196 4197 /* Check for destruction from another callback. */ 4198 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4199 goto out; 4200 4201 memset(&event, 0, sizeof event); 4202 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4203 ret = id_priv->id.event_handler(&id_priv->id, &event); 4204out: 4205 mutex_unlock(&id_priv->handler_mutex); 4206 return ret; 4207} 4208 4209static void cma_process_remove(struct cma_device *cma_dev) 4210{ 4211 struct rdma_id_private *id_priv; 4212 int ret; 4213 4214 mutex_lock(&lock); 4215 while (!list_empty(&cma_dev->id_list)) { 4216 id_priv = list_entry(cma_dev->id_list.next, 4217 struct rdma_id_private, list); 4218 4219 list_del(&id_priv->listen_list); 4220 list_del_init(&id_priv->list); 4221 atomic_inc(&id_priv->refcount); 4222 mutex_unlock(&lock); 4223 4224 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4225 cma_deref_id(id_priv); 4226 if (ret) 4227 rdma_destroy_id(&id_priv->id); 4228 4229 mutex_lock(&lock); 4230 } 4231 mutex_unlock(&lock); 4232 4233 cma_deref_dev(cma_dev); 4234 wait_for_completion(&cma_dev->comp); 4235} 4236 4237static void cma_remove_one(struct ib_device *device, void *client_data) 4238{ 4239 struct cma_device *cma_dev = client_data; 4240 4241 if (!cma_dev) 4242 return; 4243 4244 mutex_lock(&lock); 4245 list_del(&cma_dev->list); 4246 mutex_unlock(&lock); 4247 4248 cma_process_remove(cma_dev); 4249 sysctl_ctx_free(&cma_dev->sysctl_ctx); 4250 kfree(cma_dev->default_gid_type); 4251 kfree(cma_dev); 4252} 4253 4254static void cma_init_vnet(void *arg) 4255{ 4256 struct cma_pernet *pernet = &VNET(cma_pernet); 4257 4258 idr_init(&pernet->tcp_ps); 4259 idr_init(&pernet->udp_ps); 4260 idr_init(&pernet->ipoib_ps); 4261 idr_init(&pernet->ib_ps); 4262} 4263VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL); 4264 4265static void cma_destroy_vnet(void *arg) 4266{ 4267 struct cma_pernet *pernet = &VNET(cma_pernet); 4268 4269 idr_destroy(&pernet->tcp_ps); 4270 idr_destroy(&pernet->udp_ps); 4271 idr_destroy(&pernet->ipoib_ps); 4272 idr_destroy(&pernet->ib_ps); 4273} 4274VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL); 4275 4276static int __init cma_init(void) 4277{ 4278 int ret; 4279 4280 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4281 if (!cma_wq) 4282 return -ENOMEM; 4283 4284 ib_sa_register_client(&sa_client); 4285 rdma_addr_register_client(&addr_client); 4286 4287 ret = ib_register_client(&cma_client); 4288 if (ret) 4289 goto err; 4290 4291 cma_configfs_init(); 4292 4293 return 0; 4294 4295err: 4296 rdma_addr_unregister_client(&addr_client); 4297 ib_sa_unregister_client(&sa_client); 4298 destroy_workqueue(cma_wq); 4299 return ret; 4300} 4301 4302static void __exit cma_cleanup(void) 4303{ 4304 cma_configfs_exit(); 4305 ib_unregister_client(&cma_client); 4306 rdma_addr_unregister_client(&addr_client); 4307 ib_sa_unregister_client(&sa_client); 4308 destroy_workqueue(cma_wq); 4309} 4310 4311module_init(cma_init); 4312module_exit(cma_cleanup); 4313