1/* 2 * Copyright (c) 2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include <linux/dma-mapping.h> 34#include <linux/err.h> 35#include <linux/interrupt.h> 36#include <linux/rbtree.h> 37#include <linux/mutex.h> 38#include <linux/spinlock.h> 39#include <linux/pci.h> 40#include <linux/miscdevice.h> 41#include <linux/random.h> 42 43#include <rdma/ib_cache.h> 44#include <rdma/ib_sa.h> 45#include "sa.h" 46 47MODULE_AUTHOR("Sean Hefty"); 48MODULE_DESCRIPTION("InfiniBand subnet administration caching"); 49MODULE_LICENSE("Dual BSD/GPL"); 50 51enum { 52 SA_DB_MAX_PATHS_PER_DEST = 0x7F, 53 SA_DB_MIN_RETRY_TIMER = 4000, /* 4 sec */ 54 SA_DB_MAX_RETRY_TIMER = 256000 /* 256 sec */ 55}; 56 57static unsigned long paths_per_dest = 0; 58static char subscribe_inform_info = 1; 59static unsigned long retry_timer = SA_DB_MIN_RETRY_TIMER; 60 61enum sa_db_lookup_method { 62 SA_DB_LOOKUP_LEAST_USED, 63 SA_DB_LOOKUP_RANDOM 64}; 65 66static unsigned long lookup_method; 67 68static void sa_db_add_dev(struct ib_device *device); 69static void sa_db_remove_dev(struct ib_device *device); 70 71static struct ib_client sa_db_client = { 72 .name = "local_sa", 73 .add = sa_db_add_dev, 74 .remove = sa_db_remove_dev 75}; 76 77static LIST_HEAD(dev_list); 78static DEFINE_MUTEX(lock); 79static rwlock_t rwlock; 80static struct workqueue_struct *sa_wq; 81static struct ib_sa_client sa_client; 82 83enum sa_db_state { 84 SA_DB_IDLE, 85 SA_DB_REFRESH, 86 SA_DB_DESTROY 87}; 88 89struct sa_db_port { 90 struct sa_db_device *dev; 91 struct ib_mad_agent *agent; 92 /* Limit number of outstanding MADs to SA to reduce SA flooding */ 93 struct ib_mad_send_buf *msg; 94 u16 sm_lid; 95 u8 sm_sl; 96 struct ib_inform_info *in_info; 97 struct ib_inform_info *out_info; 98 struct rb_root paths; 99 struct list_head update_list; 100 unsigned long update_id; 101 enum sa_db_state state; 102 struct work_struct work; 103 union ib_gid gid; 104 int port_num; 105}; 106 107struct sa_db_device { 108 struct list_head list; 109 struct ib_device *device; 110 struct ib_event_handler event_handler; 111 int start_port; 112 int port_count; 113 struct sa_db_port port[0]; 114}; 115 116struct ib_sa_iterator { 117 struct ib_sa_iterator *next; 118}; 119 120struct ib_sa_attr_iter { 121 struct ib_sa_iterator *iter; 122 unsigned long flags; 123}; 124 125struct ib_sa_attr_list { 126 struct ib_sa_iterator iter; 127 struct ib_sa_iterator *tail; 128 int update_id; 129 union ib_gid gid; 130 struct rb_node node; 131}; 132 133struct ib_path_rec_info { 134 struct ib_sa_iterator iter; /* keep first */ 135 struct ib_sa_path_rec rec; 136 unsigned long lookups; 137}; 138 139struct ib_sa_mad_iter { 140 struct ib_mad_recv_wc *recv_wc; 141 struct ib_mad_recv_buf *recv_buf; 142 int attr_size; 143 int attr_offset; 144 int data_offset; 145 int data_left; 146 void *attr; 147 u8 attr_data[0]; 148}; 149 150enum sa_update_type { 151 SA_UPDATE_FULL, 152 SA_UPDATE_ADD, 153 SA_UPDATE_REMOVE 154}; 155 156struct update_info { 157 struct list_head list; 158 union ib_gid gid; 159 enum sa_update_type type; 160}; 161 162struct sa_path_request { 163 struct work_struct work; 164 struct ib_sa_client *client; 165 void (*callback)(int, struct ib_sa_path_rec *, void *); 166 void *context; 167 struct ib_sa_path_rec path_rec; 168}; 169 170static void process_updates(struct sa_db_port *port); 171 172static void free_attr_list(struct ib_sa_attr_list *attr_list) 173{ 174 struct ib_sa_iterator *cur; 175 176 for (cur = attr_list->iter.next; cur; cur = attr_list->iter.next) { 177 attr_list->iter.next = cur->next; 178 kfree(cur); 179 } 180 attr_list->tail = &attr_list->iter; 181} 182 183static void remove_attr(struct rb_root *root, struct ib_sa_attr_list *attr_list) 184{ 185 rb_erase(&attr_list->node, root); 186 free_attr_list(attr_list); 187 kfree(attr_list); 188} 189 190static void remove_all_attrs(struct rb_root *root) 191{ 192 struct rb_node *node, *next_node; 193 struct ib_sa_attr_list *attr_list; 194 195 write_lock_irq(&rwlock); 196 for (node = rb_first(root); node; node = next_node) { 197 next_node = rb_next(node); 198 attr_list = rb_entry(node, struct ib_sa_attr_list, node); 199 remove_attr(root, attr_list); 200 } 201 write_unlock_irq(&rwlock); 202} 203 204static void remove_old_attrs(struct rb_root *root, unsigned long update_id) 205{ 206 struct rb_node *node, *next_node; 207 struct ib_sa_attr_list *attr_list; 208 209 write_lock_irq(&rwlock); 210 for (node = rb_first(root); node; node = next_node) { 211 next_node = rb_next(node); 212 attr_list = rb_entry(node, struct ib_sa_attr_list, node); 213 if (attr_list->update_id != update_id) 214 remove_attr(root, attr_list); 215 } 216 write_unlock_irq(&rwlock); 217} 218 219static struct ib_sa_attr_list *insert_attr_list(struct rb_root *root, 220 struct ib_sa_attr_list *attr_list) 221{ 222 struct rb_node **link = &root->rb_node; 223 struct rb_node *parent = NULL; 224 struct ib_sa_attr_list *cur_attr_list; 225 int cmp; 226 227 while (*link) { 228 parent = *link; 229 cur_attr_list = rb_entry(parent, struct ib_sa_attr_list, node); 230 cmp = memcmp(&cur_attr_list->gid, &attr_list->gid, 231 sizeof attr_list->gid); 232 if (cmp < 0) 233 link = &(*link)->rb_left; 234 else if (cmp > 0) 235 link = &(*link)->rb_right; 236 else 237 return cur_attr_list; 238 } 239 rb_link_node(&attr_list->node, parent, link); 240 rb_insert_color(&attr_list->node, root); 241 return NULL; 242} 243 244static struct ib_sa_attr_list *find_attr_list(struct rb_root *root, u8 *gid) 245{ 246 struct rb_node *node = root->rb_node; 247 struct ib_sa_attr_list *attr_list; 248 int cmp; 249 250 while (node) { 251 attr_list = rb_entry(node, struct ib_sa_attr_list, node); 252 cmp = memcmp(&attr_list->gid, gid, sizeof attr_list->gid); 253 if (cmp < 0) 254 node = node->rb_left; 255 else if (cmp > 0) 256 node = node->rb_right; 257 else 258 return attr_list; 259 } 260 return NULL; 261} 262 263static int insert_attr(struct rb_root *root, unsigned long update_id, void *key, 264 struct ib_sa_iterator *iter) 265{ 266 struct ib_sa_attr_list *attr_list; 267 void *err; 268 269 write_lock_irq(&rwlock); 270 attr_list = find_attr_list(root, key); 271 if (!attr_list) { 272 write_unlock_irq(&rwlock); 273 attr_list = kmalloc(sizeof *attr_list, GFP_KERNEL); 274 if (!attr_list) 275 return -ENOMEM; 276 277 attr_list->iter.next = NULL; 278 attr_list->tail = &attr_list->iter; 279 attr_list->update_id = update_id; 280 memcpy(attr_list->gid.raw, key, sizeof attr_list->gid); 281 282 write_lock_irq(&rwlock); 283 err = insert_attr_list(root, attr_list); 284 if (err) { 285 write_unlock_irq(&rwlock); 286 kfree(attr_list); 287 return PTR_ERR(err); 288 } 289 } else if (attr_list->update_id != update_id) { 290 free_attr_list(attr_list); 291 attr_list->update_id = update_id; 292 } 293 294 attr_list->tail->next = iter; 295 iter->next = NULL; 296 attr_list->tail = iter; 297 write_unlock_irq(&rwlock); 298 return 0; 299} 300 301static struct ib_sa_mad_iter *ib_sa_iter_create(struct ib_mad_recv_wc *mad_recv_wc) 302{ 303 struct ib_sa_mad_iter *iter; 304 struct ib_sa_mad *mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad; 305 int attr_size, attr_offset; 306 307 attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8; 308 attr_size = 64; /* path record length */ 309 if (attr_offset < attr_size) 310 return ERR_PTR(-EINVAL); 311 312 iter = kzalloc(sizeof *iter + attr_size, GFP_KERNEL); 313 if (!iter) 314 return ERR_PTR(-ENOMEM); 315 316 iter->data_left = mad_recv_wc->mad_len - IB_MGMT_SA_HDR; 317 iter->recv_wc = mad_recv_wc; 318 iter->recv_buf = &mad_recv_wc->recv_buf; 319 iter->attr_offset = attr_offset; 320 iter->attr_size = attr_size; 321 return iter; 322} 323 324static void ib_sa_iter_free(struct ib_sa_mad_iter *iter) 325{ 326 kfree(iter); 327} 328 329static void *ib_sa_iter_next(struct ib_sa_mad_iter *iter) 330{ 331 struct ib_sa_mad *mad; 332 int left, offset = 0; 333 334 while (iter->data_left >= iter->attr_offset) { 335 while (iter->data_offset < IB_MGMT_SA_DATA) { 336 mad = (struct ib_sa_mad *) iter->recv_buf->mad; 337 338 left = IB_MGMT_SA_DATA - iter->data_offset; 339 if (left < iter->attr_size) { 340 /* copy first piece of the attribute */ 341 iter->attr = &iter->attr_data; 342 memcpy(iter->attr, 343 &mad->data[iter->data_offset], left); 344 offset = left; 345 break; 346 } else if (offset) { 347 /* copy the second piece of the attribute */ 348 memcpy(iter->attr + offset, &mad->data[0], 349 iter->attr_size - offset); 350 iter->data_offset = iter->attr_size - offset; 351 offset = 0; 352 } else { 353 iter->attr = &mad->data[iter->data_offset]; 354 iter->data_offset += iter->attr_size; 355 } 356 357 iter->data_left -= iter->attr_offset; 358 goto out; 359 } 360 iter->data_offset = 0; 361 iter->recv_buf = list_entry(iter->recv_buf->list.next, 362 struct ib_mad_recv_buf, list); 363 } 364 iter->attr = NULL; 365out: 366 return iter->attr; 367} 368 369/* 370 * Copy path records from a received response and insert them into our cache. 371 * A path record in the MADs are in network order, packed, and may 372 * span multiple MAD buffers, just to make our life hard. 373 */ 374static void update_path_db(struct sa_db_port *port, 375 struct ib_mad_recv_wc *mad_recv_wc, 376 enum sa_update_type type) 377{ 378 struct ib_sa_mad_iter *iter; 379 struct ib_path_rec_info *path_info; 380 void *attr; 381 int ret; 382 383 iter = ib_sa_iter_create(mad_recv_wc); 384 if (IS_ERR(iter)) 385 return; 386 387 port->update_id += (type == SA_UPDATE_FULL); 388 389 while ((attr = ib_sa_iter_next(iter)) && 390 (path_info = kmalloc(sizeof *path_info, GFP_KERNEL))) { 391 392 ib_sa_unpack_attr(&path_info->rec, attr, IB_SA_ATTR_PATH_REC); 393 394 ret = insert_attr(&port->paths, port->update_id, 395 path_info->rec.dgid.raw, &path_info->iter); 396 if (ret) { 397 kfree(path_info); 398 break; 399 } 400 } 401 ib_sa_iter_free(iter); 402 403 if (type == SA_UPDATE_FULL) 404 remove_old_attrs(&port->paths, port->update_id); 405} 406 407static struct ib_mad_send_buf *get_sa_msg(struct sa_db_port *port, 408 struct update_info *update) 409{ 410 struct ib_ah_attr ah_attr; 411 struct ib_mad_send_buf *msg; 412 413 msg = ib_create_send_mad(port->agent, 1, 0, 0, IB_MGMT_SA_HDR, 414 IB_MGMT_SA_DATA, GFP_KERNEL); 415 if (IS_ERR(msg)) 416 return NULL; 417 418 memset(&ah_attr, 0, sizeof ah_attr); 419 ah_attr.dlid = port->sm_lid; 420 ah_attr.sl = port->sm_sl; 421 ah_attr.port_num = port->port_num; 422 423 msg->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); 424 if (IS_ERR(msg->ah)) { 425 ib_free_send_mad(msg); 426 return NULL; 427 } 428 429 msg->timeout_ms = retry_timer; 430 msg->retries = 0; 431 msg->context[0] = port; 432 msg->context[1] = update; 433 return msg; 434} 435 436static __be64 form_tid(u32 hi_tid) 437{ 438 static atomic_t tid; 439 return cpu_to_be64((((u64) hi_tid) << 32) | 440 ((u32) atomic_inc_return(&tid))); 441} 442 443static void format_path_req(struct sa_db_port *port, 444 struct update_info *update, 445 struct ib_mad_send_buf *msg) 446{ 447 struct ib_sa_mad *mad = msg->mad; 448 struct ib_sa_path_rec path_rec; 449 450 mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; 451 mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; 452 mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; 453 mad->mad_hdr.method = IB_SA_METHOD_GET_TABLE; 454 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); 455 mad->mad_hdr.tid = form_tid(msg->mad_agent->hi_tid); 456 457 mad->sa_hdr.comp_mask = IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH; 458 459 path_rec.sgid = port->gid; 460 path_rec.numb_path = (u8) paths_per_dest; 461 462 if (update->type == SA_UPDATE_ADD) { 463 mad->sa_hdr.comp_mask |= IB_SA_PATH_REC_DGID; 464 memcpy(&path_rec.dgid, &update->gid, sizeof path_rec.dgid); 465 } 466 467 ib_sa_pack_attr(mad->data, &path_rec, IB_SA_ATTR_PATH_REC); 468} 469 470static int send_query(struct sa_db_port *port, 471 struct update_info *update) 472{ 473 int ret; 474 475 port->msg = get_sa_msg(port, update); 476 if (!port->msg) 477 return -ENOMEM; 478 479 format_path_req(port, update, port->msg); 480 481 ret = ib_post_send_mad(port->msg, NULL); 482 if (ret) 483 goto err; 484 485 return 0; 486 487err: 488 ib_destroy_ah(port->msg->ah); 489 ib_free_send_mad(port->msg); 490 return ret; 491} 492 493static void add_update(struct sa_db_port *port, u8 *gid, 494 enum sa_update_type type) 495{ 496 struct update_info *update; 497 498 update = kmalloc(sizeof *update, GFP_KERNEL); 499 if (update) { 500 if (gid) 501 memcpy(&update->gid, gid, sizeof update->gid); 502 update->type = type; 503 list_add(&update->list, &port->update_list); 504 } 505 506 if (port->state == SA_DB_IDLE) { 507 port->state = SA_DB_REFRESH; 508 process_updates(port); 509 } 510} 511 512static void clean_update_list(struct sa_db_port *port) 513{ 514 struct update_info *update; 515 516 while (!list_empty(&port->update_list)) { 517 update = list_entry(port->update_list.next, 518 struct update_info, list); 519 list_del(&update->list); 520 kfree(update); 521 } 522} 523 524static int notice_handler(int status, struct ib_inform_info *info, 525 struct ib_sa_notice *notice) 526{ 527 struct sa_db_port *port = info->context; 528 struct ib_sa_notice_data_gid *gid_data; 529 struct ib_inform_info **pinfo; 530 enum sa_update_type type; 531 532 if (info->trap_number == IB_SA_SM_TRAP_GID_IN_SERVICE) { 533 pinfo = &port->in_info; 534 type = SA_UPDATE_ADD; 535 } else { 536 pinfo = &port->out_info; 537 type = SA_UPDATE_REMOVE; 538 } 539 540 mutex_lock(&lock); 541 if (port->state == SA_DB_DESTROY || !*pinfo) { 542 mutex_unlock(&lock); 543 return 0; 544 } 545 546 if (notice) { 547 gid_data = (struct ib_sa_notice_data_gid *) 548 ¬ice->data_details; 549 add_update(port, gid_data->gid, type); 550 mutex_unlock(&lock); 551 } else if (status == -ENETRESET) { 552 *pinfo = NULL; 553 mutex_unlock(&lock); 554 } else { 555 if (status) 556 *pinfo = ERR_PTR(-EINVAL); 557 port->state = SA_DB_IDLE; 558 clean_update_list(port); 559 mutex_unlock(&lock); 560 queue_work(sa_wq, &port->work); 561 } 562 563 return status; 564} 565 566static int reg_in_info(struct sa_db_port *port) 567{ 568 int ret = 0; 569 570 port->in_info = ib_sa_register_inform_info(&sa_client, 571 port->dev->device, 572 port->port_num, 573 IB_SA_SM_TRAP_GID_IN_SERVICE, 574 GFP_KERNEL, notice_handler, 575 port); 576 if (IS_ERR(port->in_info)) 577 ret = PTR_ERR(port->in_info); 578 579 return ret; 580} 581 582static int reg_out_info(struct sa_db_port *port) 583{ 584 int ret = 0; 585 586 port->out_info = ib_sa_register_inform_info(&sa_client, 587 port->dev->device, 588 port->port_num, 589 IB_SA_SM_TRAP_GID_OUT_OF_SERVICE, 590 GFP_KERNEL, notice_handler, 591 port); 592 if (IS_ERR(port->out_info)) 593 ret = PTR_ERR(port->out_info); 594 595 return ret; 596} 597 598static void unsubscribe_port(struct sa_db_port *port) 599{ 600 if (port->in_info && !IS_ERR(port->in_info)) 601 ib_sa_unregister_inform_info(port->in_info); 602 603 if (port->out_info && !IS_ERR(port->out_info)) 604 ib_sa_unregister_inform_info(port->out_info); 605 606 port->out_info = NULL; 607 port->in_info = NULL; 608 609} 610 611static void cleanup_port(struct sa_db_port *port) 612{ 613 unsubscribe_port(port); 614 615 clean_update_list(port); 616 remove_all_attrs(&port->paths); 617} 618 619static int update_port_info(struct sa_db_port *port) 620{ 621 struct ib_port_attr port_attr; 622 int ret; 623 624 ret = ib_query_port(port->dev->device, port->port_num, &port_attr); 625 if (ret) 626 return ret; 627 628 if (port_attr.state != IB_PORT_ACTIVE) 629 return -ENODATA; 630 631 port->sm_lid = port_attr.sm_lid; 632 port->sm_sl = port_attr.sm_sl; 633 return 0; 634} 635 636static void process_updates(struct sa_db_port *port) 637{ 638 struct update_info *update; 639 struct ib_sa_attr_list *attr_list; 640 int ret; 641 642 if (!paths_per_dest || update_port_info(port)) { 643 cleanup_port(port); 644 goto out; 645 } 646 647 /* Event registration is an optimization, so ignore failures. */ 648 if (subscribe_inform_info) { 649 if (!port->out_info) { 650 ret = reg_out_info(port); 651 if (!ret) 652 return; 653 } 654 655 if (!port->in_info) { 656 ret = reg_in_info(port); 657 if (!ret) 658 return; 659 } 660 } else 661 unsubscribe_port(port); 662 663 while (!list_empty(&port->update_list)) { 664 update = list_entry(port->update_list.next, 665 struct update_info, list); 666 667 if (update->type == SA_UPDATE_REMOVE) { 668 write_lock_irq(&rwlock); 669 attr_list = find_attr_list(&port->paths, 670 update->gid.raw); 671 if (attr_list) 672 remove_attr(&port->paths, attr_list); 673 write_unlock_irq(&rwlock); 674 } else { 675 ret = send_query(port, update); 676 if (!ret) 677 return; 678 679 } 680 list_del(&update->list); 681 kfree(update); 682 } 683out: 684 port->state = SA_DB_IDLE; 685} 686 687static void refresh_port_db(struct sa_db_port *port) 688{ 689 if (port->state == SA_DB_DESTROY) 690 return; 691 692 if (port->state == SA_DB_REFRESH) { 693 clean_update_list(port); 694 ib_cancel_mad(port->agent, port->msg); 695 } 696 697 add_update(port, NULL, SA_UPDATE_FULL); 698} 699 700static void refresh_dev_db(struct sa_db_device *dev) 701{ 702 int i; 703 704 for (i = 0; i < dev->port_count; i++) 705 refresh_port_db(&dev->port[i]); 706} 707 708static void refresh_db(void) 709{ 710 struct sa_db_device *dev; 711 712 list_for_each_entry(dev, &dev_list, list) 713 refresh_dev_db(dev); 714} 715 716static void port_work_handler(struct work_struct *work) 717{ 718 struct sa_db_port *port; 719 720 port = container_of(work, typeof(*port), work); 721 mutex_lock(&lock); 722 refresh_port_db(port); 723 mutex_unlock(&lock); 724} 725 726static void handle_event(struct ib_event_handler *event_handler, 727 struct ib_event *event) 728{ 729 struct sa_db_device *dev; 730 struct sa_db_port *port; 731 732 dev = container_of(event_handler, typeof(*dev), event_handler); 733 port = &dev->port[event->element.port_num - dev->start_port]; 734 735 switch (event->event) { 736 case IB_EVENT_PORT_ERR: 737 case IB_EVENT_LID_CHANGE: 738 case IB_EVENT_SM_CHANGE: 739 case IB_EVENT_CLIENT_REREGISTER: 740 case IB_EVENT_PKEY_CHANGE: 741 case IB_EVENT_PORT_ACTIVE: 742 queue_work(sa_wq, &port->work); 743 break; 744 default: 745 break; 746 } 747} 748 749static void ib_free_path_iter(struct ib_sa_attr_iter *iter) 750{ 751 read_unlock_irqrestore(&rwlock, iter->flags); 752} 753 754static int ib_create_path_iter(struct ib_device *device, u8 port_num, 755 union ib_gid *dgid, struct ib_sa_attr_iter *iter) 756{ 757 struct sa_db_device *dev; 758 struct sa_db_port *port; 759 struct ib_sa_attr_list *list; 760 761 dev = ib_get_client_data(device, &sa_db_client); 762 if (!dev) 763 return -ENODEV; 764 765 port = &dev->port[port_num - dev->start_port]; 766 767 read_lock_irqsave(&rwlock, iter->flags); 768 list = find_attr_list(&port->paths, dgid->raw); 769 if (!list) { 770 ib_free_path_iter(iter); 771 return -ENODATA; 772 } 773 774 iter->iter = &list->iter; 775 return 0; 776} 777 778static struct ib_sa_path_rec *ib_get_next_path(struct ib_sa_attr_iter *iter) 779{ 780 struct ib_path_rec_info *next_path; 781 782 iter->iter = iter->iter->next; 783 if (iter->iter) { 784 next_path = container_of(iter->iter, struct ib_path_rec_info, iter); 785 return &next_path->rec; 786 } else 787 return NULL; 788} 789 790static int cmp_rec(struct ib_sa_path_rec *src, 791 struct ib_sa_path_rec *dst, ib_sa_comp_mask comp_mask) 792{ 793 /* DGID check already done */ 794 if (comp_mask & IB_SA_PATH_REC_SGID && 795 memcmp(&src->sgid, &dst->sgid, sizeof src->sgid)) 796 return -EINVAL; 797 if (comp_mask & IB_SA_PATH_REC_DLID && src->dlid != dst->dlid) 798 return -EINVAL; 799 if (comp_mask & IB_SA_PATH_REC_SLID && src->slid != dst->slid) 800 return -EINVAL; 801 if (comp_mask & IB_SA_PATH_REC_RAW_TRAFFIC && 802 src->raw_traffic != dst->raw_traffic) 803 return -EINVAL; 804 805 if (comp_mask & IB_SA_PATH_REC_FLOW_LABEL && 806 src->flow_label != dst->flow_label) 807 return -EINVAL; 808 if (comp_mask & IB_SA_PATH_REC_HOP_LIMIT && 809 src->hop_limit != dst->hop_limit) 810 return -EINVAL; 811 if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS && 812 src->traffic_class != dst->traffic_class) 813 return -EINVAL; 814 if (comp_mask & IB_SA_PATH_REC_REVERSIBLE && 815 dst->reversible && !src->reversible) 816 return -EINVAL; 817 /* Numb path check already done */ 818 if (comp_mask & IB_SA_PATH_REC_PKEY && src->pkey != dst->pkey) 819 return -EINVAL; 820 821 if (comp_mask & IB_SA_PATH_REC_SL && src->sl != dst->sl) 822 return -EINVAL; 823 824 if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_MTU_SELECTOR, 825 IB_SA_PATH_REC_MTU, dst->mtu_selector, 826 src->mtu, dst->mtu)) 827 return -EINVAL; 828 if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_RATE_SELECTOR, 829 IB_SA_PATH_REC_RATE, dst->rate_selector, 830 src->rate, dst->rate)) 831 return -EINVAL; 832 if (ib_sa_check_selector(comp_mask, 833 IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR, 834 IB_SA_PATH_REC_PACKET_LIFE_TIME, 835 dst->packet_life_time_selector, 836 src->packet_life_time, dst->packet_life_time)) 837 return -EINVAL; 838 839 return 0; 840} 841 842static struct ib_sa_path_rec *get_random_path(struct ib_sa_attr_iter *iter, 843 struct ib_sa_path_rec *req_path, 844 ib_sa_comp_mask comp_mask) 845{ 846 struct ib_sa_path_rec *path, *rand_path = NULL; 847 int num, count = 0; 848 849 for (path = ib_get_next_path(iter); path; 850 path = ib_get_next_path(iter)) { 851 if (!cmp_rec(path, req_path, comp_mask)) { 852 get_random_bytes(&num, sizeof num); 853 if ((num % ++count) == 0) 854 rand_path = path; 855 } 856 } 857 858 return rand_path; 859} 860 861static struct ib_sa_path_rec *get_next_path(struct ib_sa_attr_iter *iter, 862 struct ib_sa_path_rec *req_path, 863 ib_sa_comp_mask comp_mask) 864{ 865 struct ib_path_rec_info *cur_path, *next_path = NULL; 866 struct ib_sa_path_rec *path; 867 unsigned long lookups = ~0; 868 869 for (path = ib_get_next_path(iter); path; 870 path = ib_get_next_path(iter)) { 871 if (!cmp_rec(path, req_path, comp_mask)) { 872 873 cur_path = container_of(iter->iter, struct ib_path_rec_info, 874 iter); 875 if (cur_path->lookups < lookups) { 876 lookups = cur_path->lookups; 877 next_path = cur_path; 878 } 879 } 880 } 881 882 if (next_path) { 883 next_path->lookups++; 884 return &next_path->rec; 885 } else 886 return NULL; 887} 888 889static void report_path(struct work_struct *work) 890{ 891 struct sa_path_request *req; 892 893 req = container_of(work, struct sa_path_request, work); 894 req->callback(0, &req->path_rec, req->context); 895 ib_sa_client_put(req->client); 896 kfree(req); 897} 898 899/** 900 * ib_sa_path_rec_get - Start a Path get query 901 * @client:SA client 902 * @device:device to send query on 903 * @port_num: port number to send query on 904 * @rec:Path Record to send in query 905 * @comp_mask:component mask to send in query 906 * @timeout_ms:time to wait for response 907 * @gfp_mask:GFP mask to use for internal allocations 908 * @callback:function called when query completes, times out or is 909 * canceled 910 * @context:opaque user context passed to callback 911 * @sa_query:query context, used to cancel query 912 * 913 * Send a Path Record Get query to the SA to look up a path. The 914 * callback function will be called when the query completes (or 915 * fails); status is 0 for a successful response, -EINTR if the query 916 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error 917 * occurred sending the query. The resp parameter of the callback is 918 * only valid if status is 0. 919 * 920 * If the return value of ib_sa_path_rec_get() is negative, it is an 921 * error code. Otherwise it is a query ID that can be used to cancel 922 * the query. 923 */ 924int ib_sa_path_rec_get(struct ib_sa_client *client, 925 struct ib_device *device, u8 port_num, 926 struct ib_sa_path_rec *rec, 927 ib_sa_comp_mask comp_mask, 928 int timeout_ms, gfp_t gfp_mask, 929 void (*callback)(int status, 930 struct ib_sa_path_rec *resp, 931 void *context), 932 void *context, 933 struct ib_sa_query **sa_query) 934{ 935 struct sa_path_request *req; 936 struct ib_sa_attr_iter iter; 937 struct ib_sa_path_rec *path_rec; 938 int ret; 939 940 if (!paths_per_dest) 941 goto query_sa; 942 943 if (!(comp_mask & IB_SA_PATH_REC_DGID) || 944 !(comp_mask & IB_SA_PATH_REC_NUMB_PATH) || rec->numb_path != 1) 945 goto query_sa; 946 947 req = kmalloc(sizeof *req, gfp_mask); 948 if (!req) 949 goto query_sa; 950 951 ret = ib_create_path_iter(device, port_num, &rec->dgid, &iter); 952 if (ret) 953 goto free_req; 954 955 if (lookup_method == SA_DB_LOOKUP_RANDOM) 956 path_rec = get_random_path(&iter, rec, comp_mask); 957 else 958 path_rec = get_next_path(&iter, rec, comp_mask); 959 960 if (!path_rec) 961 goto free_iter; 962 963 memcpy(&req->path_rec, path_rec, sizeof *path_rec); 964 ib_free_path_iter(&iter); 965 966 INIT_WORK(&req->work, report_path); 967 req->client = client; 968 req->callback = callback; 969 req->context = context; 970 971 ib_sa_client_get(client); 972 queue_work(sa_wq, &req->work); 973 *sa_query = ERR_PTR(-EEXIST); 974 return 0; 975 976free_iter: 977 ib_free_path_iter(&iter); 978free_req: 979 kfree(req); 980query_sa: 981 return ib_sa_path_rec_query(client, device, port_num, rec, comp_mask, 982 timeout_ms, gfp_mask, callback, context, 983 sa_query); 984} 985EXPORT_SYMBOL(ib_sa_path_rec_get); 986 987static void recv_handler(struct ib_mad_agent *mad_agent, 988 struct ib_mad_recv_wc *mad_recv_wc) 989{ 990 struct sa_db_port *port; 991 struct update_info *update; 992 struct ib_mad_send_buf *msg; 993 enum sa_update_type type; 994 995 msg = (struct ib_mad_send_buf *) (unsigned long) mad_recv_wc->wc->wr_id; 996 port = msg->context[0]; 997 update = msg->context[1]; 998 999 mutex_lock(&lock); 1000 if (port->state == SA_DB_DESTROY || 1001 update != list_entry(port->update_list.next, 1002 struct update_info, list)) { 1003 mutex_unlock(&lock); 1004 } else { 1005 type = update->type; 1006 mutex_unlock(&lock); 1007 update_path_db(mad_agent->context, mad_recv_wc, type); 1008 } 1009 1010 ib_free_recv_mad(mad_recv_wc); 1011} 1012 1013static void send_handler(struct ib_mad_agent *agent, 1014 struct ib_mad_send_wc *mad_send_wc) 1015{ 1016 struct ib_mad_send_buf *msg; 1017 struct sa_db_port *port; 1018 struct update_info *update; 1019 int ret; 1020 1021 msg = mad_send_wc->send_buf; 1022 port = msg->context[0]; 1023 update = msg->context[1]; 1024 1025 mutex_lock(&lock); 1026 if (port->state == SA_DB_DESTROY) 1027 goto unlock; 1028 1029 if (update == list_entry(port->update_list.next, 1030 struct update_info, list)) { 1031 1032 if (mad_send_wc->status == IB_WC_RESP_TIMEOUT_ERR && 1033 msg->timeout_ms < SA_DB_MAX_RETRY_TIMER) { 1034 1035 msg->timeout_ms <<= 1; 1036 ret = ib_post_send_mad(msg, NULL); 1037 if (!ret) { 1038 mutex_unlock(&lock); 1039 return; 1040 } 1041 } 1042 list_del(&update->list); 1043 kfree(update); 1044 } 1045 process_updates(port); 1046unlock: 1047 mutex_unlock(&lock); 1048 1049 ib_destroy_ah(msg->ah); 1050 ib_free_send_mad(msg); 1051} 1052 1053static int init_port(struct sa_db_device *dev, int port_num) 1054{ 1055 struct sa_db_port *port; 1056 int ret; 1057 1058 port = &dev->port[port_num - dev->start_port]; 1059 port->dev = dev; 1060 port->port_num = port_num; 1061 INIT_WORK(&port->work, port_work_handler); 1062 port->paths = RB_ROOT; 1063 INIT_LIST_HEAD(&port->update_list); 1064 1065 ret = ib_get_cached_gid(dev->device, port_num, 0, &port->gid); 1066 if (ret) 1067 return ret; 1068 1069 port->agent = ib_register_mad_agent(dev->device, port_num, IB_QPT_GSI, 1070 NULL, IB_MGMT_RMPP_VERSION, 1071 send_handler, recv_handler, port); 1072 if (IS_ERR(port->agent)) 1073 ret = PTR_ERR(port->agent); 1074 1075 return ret; 1076} 1077 1078static void destroy_port(struct sa_db_port *port) 1079{ 1080 mutex_lock(&lock); 1081 port->state = SA_DB_DESTROY; 1082 mutex_unlock(&lock); 1083 1084 ib_unregister_mad_agent(port->agent); 1085 cleanup_port(port); 1086 flush_workqueue(sa_wq); 1087} 1088 1089static void sa_db_add_dev(struct ib_device *device) 1090{ 1091 struct sa_db_device *dev; 1092 struct sa_db_port *port; 1093 int s, e, i, ret; 1094 1095 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1096 return; 1097 1098 if (device->node_type == RDMA_NODE_IB_SWITCH) { 1099 s = e = 0; 1100 } else { 1101 s = 1; 1102 e = device->phys_port_cnt; 1103 } 1104 1105 dev = kzalloc(sizeof *dev + (e - s + 1) * sizeof *port, GFP_KERNEL); 1106 if (!dev) 1107 return; 1108 1109 dev->start_port = s; 1110 dev->port_count = e - s + 1; 1111 dev->device = device; 1112 for (i = 0; i < dev->port_count; i++) { 1113 ret = init_port(dev, s + i); 1114 if (ret) 1115 goto err; 1116 } 1117 1118 ib_set_client_data(device, &sa_db_client, dev); 1119 1120 INIT_IB_EVENT_HANDLER(&dev->event_handler, device, handle_event); 1121 1122 mutex_lock(&lock); 1123 list_add_tail(&dev->list, &dev_list); 1124 refresh_dev_db(dev); 1125 mutex_unlock(&lock); 1126 1127 ib_register_event_handler(&dev->event_handler); 1128 return; 1129err: 1130 while (i--) 1131 destroy_port(&dev->port[i]); 1132 kfree(dev); 1133} 1134 1135static void sa_db_remove_dev(struct ib_device *device) 1136{ 1137 struct sa_db_device *dev; 1138 int i; 1139 1140 dev = ib_get_client_data(device, &sa_db_client); 1141 if (!dev) 1142 return; 1143 1144 ib_unregister_event_handler(&dev->event_handler); 1145 flush_workqueue(sa_wq); 1146 1147 for (i = 0; i < dev->port_count; i++) 1148 destroy_port(&dev->port[i]); 1149 1150 mutex_lock(&lock); 1151 list_del(&dev->list); 1152 mutex_unlock(&lock); 1153 1154 kfree(dev); 1155} 1156 1157int sa_db_init(void) 1158{ 1159 int ret; 1160 1161 rwlock_init(&rwlock); 1162 sa_wq = create_singlethread_workqueue("local_sa"); 1163 if (!sa_wq) 1164 return -ENOMEM; 1165 1166 ib_sa_register_client(&sa_client); 1167 ret = ib_register_client(&sa_db_client); 1168 if (ret) 1169 goto err; 1170 1171 return 0; 1172 1173err: 1174 ib_sa_unregister_client(&sa_client); 1175 destroy_workqueue(sa_wq); 1176 return ret; 1177} 1178 1179void sa_db_cleanup(void) 1180{ 1181 ib_unregister_client(&sa_db_client); 1182 ib_sa_unregister_client(&sa_client); 1183 destroy_workqueue(sa_wq); 1184} 1185