1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Multipath support for RPC 4 * 5 * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved. 6 * 7 * Trond Myklebust <trond.myklebust@primarydata.com> 8 * 9 */ 10#include <linux/atomic.h> 11#include <linux/types.h> 12#include <linux/kref.h> 13#include <linux/list.h> 14#include <linux/rcupdate.h> 15#include <linux/rculist.h> 16#include <linux/slab.h> 17#include <linux/spinlock.h> 18#include <linux/sunrpc/xprt.h> 19#include <linux/sunrpc/addr.h> 20#include <linux/sunrpc/xprtmultipath.h> 21 22#include "sysfs.h" 23 24typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps, 25 const struct rpc_xprt *cur); 26 27static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular; 28static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin; 29static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall; 30static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline; 31 32static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps, 33 struct rpc_xprt *xprt) 34{ 35 if (unlikely(xprt_get(xprt) == NULL)) 36 return; 37 list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list); 38 smp_wmb(); 39 if (xps->xps_nxprts == 0) 40 xps->xps_net = xprt->xprt_net; 41 xps->xps_nxprts++; 42 xps->xps_nactive++; 43} 44 45/** 46 * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch 47 * @xps: pointer to struct rpc_xprt_switch 48 * @xprt: pointer to struct rpc_xprt 49 * 50 * Adds xprt to the end of the list of struct rpc_xprt in xps. 51 */ 52void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, 53 struct rpc_xprt *xprt) 54{ 55 if (xprt == NULL) 56 return; 57 spin_lock(&xps->xps_lock); 58 if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) 59 xprt_switch_add_xprt_locked(xps, xprt); 60 spin_unlock(&xps->xps_lock); 61 rpc_sysfs_xprt_setup(xps, xprt, GFP_KERNEL); 62} 63 64static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps, 65 struct rpc_xprt *xprt, bool offline) 66{ 67 if (unlikely(xprt == NULL)) 68 return; 69 if (!test_bit(XPRT_OFFLINE, &xprt->state) && offline) 70 xps->xps_nactive--; 71 xps->xps_nxprts--; 72 if (xps->xps_nxprts == 0) 73 xps->xps_net = NULL; 74 smp_wmb(); 75 list_del_rcu(&xprt->xprt_switch); 76} 77 78/** 79 * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch 80 * @xps: pointer to struct rpc_xprt_switch 81 * @xprt: pointer to struct rpc_xprt 82 * @offline: indicates if the xprt that's being removed is in an offline state 83 * 84 * Removes xprt from the list of struct rpc_xprt in xps. 85 */ 86void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, 87 struct rpc_xprt *xprt, bool offline) 88{ 89 spin_lock(&xps->xps_lock); 90 xprt_switch_remove_xprt_locked(xps, xprt, offline); 91 spin_unlock(&xps->xps_lock); 92 xprt_put(xprt); 93} 94 95static DEFINE_IDA(rpc_xprtswitch_ids); 96 97void xprt_multipath_cleanup_ids(void) 98{ 99 ida_destroy(&rpc_xprtswitch_ids); 100} 101 102static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) 103{ 104 int id; 105 106 id = ida_alloc(&rpc_xprtswitch_ids, gfp_flags); 107 if (id < 0) 108 return id; 109 110 xps->xps_id = id; 111 return 0; 112} 113 114static void xprt_switch_free_id(struct rpc_xprt_switch *xps) 115{ 116 ida_free(&rpc_xprtswitch_ids, xps->xps_id); 117} 118 119/** 120 * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch 121 * @xprt: pointer to struct rpc_xprt 122 * @gfp_flags: allocation flags 123 * 124 * On success, returns an initialised struct rpc_xprt_switch, containing 125 * the entry xprt. Returns NULL on failure. 126 */ 127struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt, 128 gfp_t gfp_flags) 129{ 130 struct rpc_xprt_switch *xps; 131 132 xps = kmalloc(sizeof(*xps), gfp_flags); 133 if (xps != NULL) { 134 spin_lock_init(&xps->xps_lock); 135 kref_init(&xps->xps_kref); 136 xprt_switch_alloc_id(xps, gfp_flags); 137 xps->xps_nxprts = xps->xps_nactive = 0; 138 atomic_long_set(&xps->xps_queuelen, 0); 139 xps->xps_net = NULL; 140 INIT_LIST_HEAD(&xps->xps_xprt_list); 141 xps->xps_iter_ops = &rpc_xprt_iter_singular; 142 rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags); 143 xprt_switch_add_xprt_locked(xps, xprt); 144 xps->xps_nunique_destaddr_xprts = 1; 145 rpc_sysfs_xprt_setup(xps, xprt, gfp_flags); 146 } 147 148 return xps; 149} 150 151static void xprt_switch_free_entries(struct rpc_xprt_switch *xps) 152{ 153 spin_lock(&xps->xps_lock); 154 while (!list_empty(&xps->xps_xprt_list)) { 155 struct rpc_xprt *xprt; 156 157 xprt = list_first_entry(&xps->xps_xprt_list, 158 struct rpc_xprt, xprt_switch); 159 xprt_switch_remove_xprt_locked(xps, xprt, true); 160 spin_unlock(&xps->xps_lock); 161 xprt_put(xprt); 162 spin_lock(&xps->xps_lock); 163 } 164 spin_unlock(&xps->xps_lock); 165} 166 167static void xprt_switch_free(struct kref *kref) 168{ 169 struct rpc_xprt_switch *xps = container_of(kref, 170 struct rpc_xprt_switch, xps_kref); 171 172 xprt_switch_free_entries(xps); 173 rpc_sysfs_xprt_switch_destroy(xps); 174 xprt_switch_free_id(xps); 175 kfree_rcu(xps, xps_rcu); 176} 177 178/** 179 * xprt_switch_get - Return a reference to a rpc_xprt_switch 180 * @xps: pointer to struct rpc_xprt_switch 181 * 182 * Returns a reference to xps unless the refcount is already zero. 183 */ 184struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps) 185{ 186 if (xps != NULL && kref_get_unless_zero(&xps->xps_kref)) 187 return xps; 188 return NULL; 189} 190 191/** 192 * xprt_switch_put - Release a reference to a rpc_xprt_switch 193 * @xps: pointer to struct rpc_xprt_switch 194 * 195 * Release the reference to xps, and free it once the refcount is zero. 196 */ 197void xprt_switch_put(struct rpc_xprt_switch *xps) 198{ 199 if (xps != NULL) 200 kref_put(&xps->xps_kref, xprt_switch_free); 201} 202 203/** 204 * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch 205 * @xps: pointer to struct rpc_xprt_switch 206 * 207 * Sets a round-robin default policy for iterators acting on xps. 208 */ 209void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps) 210{ 211 if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin) 212 WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin); 213} 214 215static 216const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi) 217{ 218 if (xpi->xpi_ops != NULL) 219 return xpi->xpi_ops; 220 return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops; 221} 222 223static 224void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi) 225{ 226} 227 228static 229void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi) 230{ 231 WRITE_ONCE(xpi->xpi_cursor, NULL); 232} 233 234static 235bool xprt_is_active(const struct rpc_xprt *xprt) 236{ 237 return (kref_read(&xprt->kref) != 0 && 238 !test_bit(XPRT_OFFLINE, &xprt->state)); 239} 240 241static 242struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head) 243{ 244 struct rpc_xprt *pos; 245 246 list_for_each_entry_rcu(pos, head, xprt_switch) { 247 if (xprt_is_active(pos)) 248 return pos; 249 } 250 return NULL; 251} 252 253static 254struct rpc_xprt *xprt_switch_find_first_entry_offline(struct list_head *head) 255{ 256 struct rpc_xprt *pos; 257 258 list_for_each_entry_rcu(pos, head, xprt_switch) { 259 if (!xprt_is_active(pos)) 260 return pos; 261 } 262 return NULL; 263} 264 265static 266struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi) 267{ 268 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 269 270 if (xps == NULL) 271 return NULL; 272 return xprt_switch_find_first_entry(&xps->xps_xprt_list); 273} 274 275static 276struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head, 277 const struct rpc_xprt *cur, 278 bool find_active) 279{ 280 struct rpc_xprt *pos; 281 bool found = false; 282 283 list_for_each_entry_rcu(pos, head, xprt_switch) { 284 if (cur == pos) 285 found = true; 286 if (found && ((find_active && xprt_is_active(pos)) || 287 (!find_active && !xprt_is_active(pos)))) 288 return pos; 289 } 290 return NULL; 291} 292 293static 294struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head, 295 const struct rpc_xprt *cur) 296{ 297 return _xprt_switch_find_current_entry(head, cur, true); 298} 299 300static 301struct rpc_xprt * _xprt_iter_current_entry(struct rpc_xprt_iter *xpi, 302 struct rpc_xprt *first_entry(struct list_head *head), 303 struct rpc_xprt *current_entry(struct list_head *head, 304 const struct rpc_xprt *cur)) 305{ 306 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 307 struct list_head *head; 308 309 if (xps == NULL) 310 return NULL; 311 head = &xps->xps_xprt_list; 312 if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2) 313 return first_entry(head); 314 return current_entry(head, xpi->xpi_cursor); 315} 316 317static 318struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) 319{ 320 return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry, 321 xprt_switch_find_current_entry); 322} 323 324static 325struct rpc_xprt *xprt_switch_find_current_entry_offline(struct list_head *head, 326 const struct rpc_xprt *cur) 327{ 328 return _xprt_switch_find_current_entry(head, cur, false); 329} 330 331static 332struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) 333{ 334 return _xprt_iter_current_entry(xpi, 335 xprt_switch_find_first_entry_offline, 336 xprt_switch_find_current_entry_offline); 337} 338 339static 340bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, 341 const struct sockaddr *sap) 342{ 343 struct list_head *head; 344 struct rpc_xprt *pos; 345 346 if (xps == NULL || sap == NULL) 347 return false; 348 349 head = &xps->xps_xprt_list; 350 list_for_each_entry_rcu(pos, head, xprt_switch) { 351 if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) { 352 pr_info("RPC: addr %s already in xprt switch\n", 353 pos->address_strings[RPC_DISPLAY_ADDR]); 354 return true; 355 } 356 } 357 return false; 358} 359 360bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, 361 const struct sockaddr *sap) 362{ 363 bool res; 364 365 rcu_read_lock(); 366 res = __rpc_xprt_switch_has_addr(xps, sap); 367 rcu_read_unlock(); 368 369 return res; 370} 371 372static 373struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, 374 const struct rpc_xprt *cur, bool check_active) 375{ 376 struct rpc_xprt *pos, *prev = NULL; 377 bool found = false; 378 379 list_for_each_entry_rcu(pos, head, xprt_switch) { 380 if (cur == prev) 381 found = true; 382 /* for request to return active transports return only 383 * active, for request to return offline transports 384 * return only offline 385 */ 386 if (found && ((check_active && xprt_is_active(pos)) || 387 (!check_active && !xprt_is_active(pos)))) 388 return pos; 389 prev = pos; 390 } 391 return NULL; 392} 393 394static 395struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps, 396 struct rpc_xprt **cursor, 397 xprt_switch_find_xprt_t find_next) 398{ 399 struct rpc_xprt *pos, *old; 400 401 old = smp_load_acquire(cursor); 402 pos = find_next(xps, old); 403 smp_store_release(cursor, pos); 404 return pos; 405} 406 407static 408struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi, 409 xprt_switch_find_xprt_t find_next) 410{ 411 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 412 413 if (xps == NULL) 414 return NULL; 415 return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next); 416} 417 418static 419struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head, 420 const struct rpc_xprt *cur) 421{ 422 struct rpc_xprt *ret; 423 424 ret = xprt_switch_find_next_entry(head, cur, true); 425 if (ret != NULL) 426 return ret; 427 return xprt_switch_find_first_entry(head); 428} 429 430static 431struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps, 432 const struct rpc_xprt *cur) 433{ 434 struct list_head *head = &xps->xps_xprt_list; 435 struct rpc_xprt *xprt; 436 unsigned int nactive; 437 438 for (;;) { 439 unsigned long xprt_queuelen, xps_queuelen; 440 441 xprt = __xprt_switch_find_next_entry_roundrobin(head, cur); 442 if (!xprt) 443 break; 444 xprt_queuelen = atomic_long_read(&xprt->queuelen); 445 xps_queuelen = atomic_long_read(&xps->xps_queuelen); 446 nactive = READ_ONCE(xps->xps_nactive); 447 /* Exit loop if xprt_queuelen <= average queue length */ 448 if (xprt_queuelen * nactive <= xps_queuelen) 449 break; 450 cur = xprt; 451 } 452 return xprt; 453} 454 455static 456struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi) 457{ 458 return xprt_iter_next_entry_multiple(xpi, 459 xprt_switch_find_next_entry_roundrobin); 460} 461 462static 463struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps, 464 const struct rpc_xprt *cur) 465{ 466 return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, true); 467} 468 469static 470struct rpc_xprt *xprt_switch_find_next_entry_offline(struct rpc_xprt_switch *xps, 471 const struct rpc_xprt *cur) 472{ 473 return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, false); 474} 475 476static 477struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi) 478{ 479 return xprt_iter_next_entry_multiple(xpi, 480 xprt_switch_find_next_entry_all); 481} 482 483static 484struct rpc_xprt *xprt_iter_next_entry_offline(struct rpc_xprt_iter *xpi) 485{ 486 return xprt_iter_next_entry_multiple(xpi, 487 xprt_switch_find_next_entry_offline); 488} 489 490/* 491 * xprt_iter_rewind - Resets the xprt iterator 492 * @xpi: pointer to rpc_xprt_iter 493 * 494 * Resets xpi to ensure that it points to the first entry in the list 495 * of transports. 496 */ 497void xprt_iter_rewind(struct rpc_xprt_iter *xpi) 498{ 499 rcu_read_lock(); 500 xprt_iter_ops(xpi)->xpi_rewind(xpi); 501 rcu_read_unlock(); 502} 503 504static void __xprt_iter_init(struct rpc_xprt_iter *xpi, 505 struct rpc_xprt_switch *xps, 506 const struct rpc_xprt_iter_ops *ops) 507{ 508 rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps)); 509 xpi->xpi_cursor = NULL; 510 xpi->xpi_ops = ops; 511} 512 513/** 514 * xprt_iter_init - Initialise an xprt iterator 515 * @xpi: pointer to rpc_xprt_iter 516 * @xps: pointer to rpc_xprt_switch 517 * 518 * Initialises the iterator to use the default iterator ops 519 * as set in xps. This function is mainly intended for internal 520 * use in the rpc_client. 521 */ 522void xprt_iter_init(struct rpc_xprt_iter *xpi, 523 struct rpc_xprt_switch *xps) 524{ 525 __xprt_iter_init(xpi, xps, NULL); 526} 527 528/** 529 * xprt_iter_init_listall - Initialise an xprt iterator 530 * @xpi: pointer to rpc_xprt_iter 531 * @xps: pointer to rpc_xprt_switch 532 * 533 * Initialises the iterator to iterate once through the entire list 534 * of entries in xps. 535 */ 536void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, 537 struct rpc_xprt_switch *xps) 538{ 539 __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall); 540} 541 542void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, 543 struct rpc_xprt_switch *xps) 544{ 545 __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listoffline); 546} 547 548/** 549 * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch 550 * @xpi: pointer to rpc_xprt_iter 551 * @newswitch: pointer to a new rpc_xprt_switch or NULL 552 * 553 * Swaps out the existing xpi->xpi_xpswitch with a new value. 554 */ 555struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi, 556 struct rpc_xprt_switch *newswitch) 557{ 558 struct rpc_xprt_switch __rcu *oldswitch; 559 560 /* Atomically swap out the old xpswitch */ 561 oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch)); 562 if (newswitch != NULL) 563 xprt_iter_rewind(xpi); 564 return rcu_dereference_protected(oldswitch, true); 565} 566 567/** 568 * xprt_iter_destroy - Destroys the xprt iterator 569 * @xpi: pointer to rpc_xprt_iter 570 */ 571void xprt_iter_destroy(struct rpc_xprt_iter *xpi) 572{ 573 xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL)); 574} 575 576/** 577 * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor 578 * @xpi: pointer to rpc_xprt_iter 579 * 580 * Returns a pointer to the struct rpc_xprt that is currently 581 * pointed to by the cursor. 582 * Caller must be holding rcu_read_lock(). 583 */ 584struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi) 585{ 586 WARN_ON_ONCE(!rcu_read_lock_held()); 587 return xprt_iter_ops(xpi)->xpi_xprt(xpi); 588} 589 590static 591struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi, 592 struct rpc_xprt *(*fn)(struct rpc_xprt_iter *)) 593{ 594 struct rpc_xprt *ret; 595 596 do { 597 ret = fn(xpi); 598 if (ret == NULL) 599 break; 600 ret = xprt_get(ret); 601 } while (ret == NULL); 602 return ret; 603} 604 605/** 606 * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor 607 * @xpi: pointer to rpc_xprt_iter 608 * 609 * Returns a reference to the struct rpc_xprt that is currently 610 * pointed to by the cursor. 611 */ 612struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi) 613{ 614 struct rpc_xprt *xprt; 615 616 rcu_read_lock(); 617 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt); 618 rcu_read_unlock(); 619 return xprt; 620} 621 622/** 623 * xprt_iter_get_next - Returns the next rpc_xprt following the cursor 624 * @xpi: pointer to rpc_xprt_iter 625 * 626 * Returns a reference to the struct rpc_xprt that immediately follows the 627 * entry pointed to by the cursor. 628 */ 629struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi) 630{ 631 struct rpc_xprt *xprt; 632 633 rcu_read_lock(); 634 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next); 635 rcu_read_unlock(); 636 return xprt; 637} 638 639/* Policy for always returning the first entry in the rpc_xprt_switch */ 640static 641const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = { 642 .xpi_rewind = xprt_iter_no_rewind, 643 .xpi_xprt = xprt_iter_first_entry, 644 .xpi_next = xprt_iter_first_entry, 645}; 646 647/* Policy for round-robin iteration of entries in the rpc_xprt_switch */ 648static 649const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = { 650 .xpi_rewind = xprt_iter_default_rewind, 651 .xpi_xprt = xprt_iter_current_entry, 652 .xpi_next = xprt_iter_next_entry_roundrobin, 653}; 654 655/* Policy for once-through iteration of entries in the rpc_xprt_switch */ 656static 657const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = { 658 .xpi_rewind = xprt_iter_default_rewind, 659 .xpi_xprt = xprt_iter_current_entry, 660 .xpi_next = xprt_iter_next_entry_all, 661}; 662 663static 664const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline = { 665 .xpi_rewind = xprt_iter_default_rewind, 666 .xpi_xprt = xprt_iter_current_entry_offline, 667 .xpi_next = xprt_iter_next_entry_offline, 668}; 669