1/* AFS volume location management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12#include <linux/kernel.h> 13#include <linux/module.h> 14#include <linux/init.h> 15#include <linux/sched.h> 16#include "internal.h" 17 18unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ 19unsigned afs_vlocation_update_timeout = 10 * 60; 20 21static void afs_vlocation_reaper(struct work_struct *); 22static void afs_vlocation_updater(struct work_struct *); 23 24static LIST_HEAD(afs_vlocation_updates); 25static LIST_HEAD(afs_vlocation_graveyard); 26static DEFINE_SPINLOCK(afs_vlocation_updates_lock); 27static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); 28static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); 29static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); 30static struct workqueue_struct *afs_vlocation_update_worker; 31 32/* 33 * iterate through the VL servers in a cell until one of them admits knowing 34 * about the volume in question 35 */ 36static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, 37 struct key *key, 38 struct afs_cache_vlocation *vldb) 39{ 40 struct afs_cell *cell = vl->cell; 41 struct in_addr addr; 42 int count, ret; 43 44 _enter("%s,%s", cell->name, vl->vldb.name); 45 46 down_write(&vl->cell->vl_sem); 47 ret = -ENOMEDIUM; 48 for (count = cell->vl_naddrs; count > 0; count--) { 49 addr = cell->vl_addrs[cell->vl_curr_svix]; 50 51 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 52 53 /* attempt to access the VL server */ 54 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb, 55 &afs_sync_call); 56 switch (ret) { 57 case 0: 58 goto out; 59 case -ENOMEM: 60 case -ENONET: 61 case -ENETUNREACH: 62 case -EHOSTUNREACH: 63 case -ECONNREFUSED: 64 if (ret == -ENOMEM || ret == -ENONET) 65 goto out; 66 goto rotate; 67 case -ENOMEDIUM: 68 goto out; 69 default: 70 ret = -EIO; 71 goto rotate; 72 } 73 74 /* rotate the server records upon lookup failure */ 75 rotate: 76 cell->vl_curr_svix++; 77 cell->vl_curr_svix %= cell->vl_naddrs; 78 } 79 80out: 81 up_write(&vl->cell->vl_sem); 82 _leave(" = %d", ret); 83 return ret; 84} 85 86/* 87 * iterate through the VL servers in a cell until one of them admits knowing 88 * about the volume in question 89 */ 90static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, 91 struct key *key, 92 afs_volid_t volid, 93 afs_voltype_t voltype, 94 struct afs_cache_vlocation *vldb) 95{ 96 struct afs_cell *cell = vl->cell; 97 struct in_addr addr; 98 int count, ret; 99 100 _enter("%s,%x,%d,", cell->name, volid, voltype); 101 102 down_write(&vl->cell->vl_sem); 103 ret = -ENOMEDIUM; 104 for (count = cell->vl_naddrs; count > 0; count--) { 105 addr = cell->vl_addrs[cell->vl_curr_svix]; 106 107 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 108 109 /* attempt to access the VL server */ 110 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb, 111 &afs_sync_call); 112 switch (ret) { 113 case 0: 114 goto out; 115 case -ENOMEM: 116 case -ENONET: 117 case -ENETUNREACH: 118 case -EHOSTUNREACH: 119 case -ECONNREFUSED: 120 if (ret == -ENOMEM || ret == -ENONET) 121 goto out; 122 goto rotate; 123 case -EBUSY: 124 vl->upd_busy_cnt++; 125 if (vl->upd_busy_cnt <= 3) { 126 if (vl->upd_busy_cnt > 1) { 127 /* second+ BUSY - sleep a little bit */ 128 set_current_state(TASK_UNINTERRUPTIBLE); 129 schedule_timeout(1); 130 __set_current_state(TASK_RUNNING); 131 } 132 continue; 133 } 134 break; 135 case -ENOMEDIUM: 136 vl->upd_rej_cnt++; 137 goto rotate; 138 default: 139 ret = -EIO; 140 goto rotate; 141 } 142 143 /* rotate the server records upon lookup failure */ 144 rotate: 145 cell->vl_curr_svix++; 146 cell->vl_curr_svix %= cell->vl_naddrs; 147 vl->upd_busy_cnt = 0; 148 } 149 150out: 151 if (ret < 0 && vl->upd_rej_cnt > 0) { 152 printk(KERN_NOTICE "kAFS:" 153 " Active volume no longer valid '%s'\n", 154 vl->vldb.name); 155 vl->valid = 0; 156 ret = -ENOMEDIUM; 157 } 158 159 up_write(&vl->cell->vl_sem); 160 _leave(" = %d", ret); 161 return ret; 162} 163 164/* 165 * allocate a volume location record 166 */ 167static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, 168 const char *name, 169 size_t namesz) 170{ 171 struct afs_vlocation *vl; 172 173 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); 174 if (vl) { 175 vl->cell = cell; 176 vl->state = AFS_VL_NEW; 177 atomic_set(&vl->usage, 1); 178 INIT_LIST_HEAD(&vl->link); 179 INIT_LIST_HEAD(&vl->grave); 180 INIT_LIST_HEAD(&vl->update); 181 init_waitqueue_head(&vl->waitq); 182 spin_lock_init(&vl->lock); 183 memcpy(vl->vldb.name, name, namesz); 184 } 185 186 _leave(" = %p", vl); 187 return vl; 188} 189 190/* 191 * update record if we found it in the cache 192 */ 193static int afs_vlocation_update_record(struct afs_vlocation *vl, 194 struct key *key, 195 struct afs_cache_vlocation *vldb) 196{ 197 afs_voltype_t voltype; 198 afs_volid_t vid; 199 int ret; 200 201 /* try to look up a cached volume in the cell VL databases by ID */ 202 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 203 vl->vldb.name, 204 vl->vldb.vidmask, 205 ntohl(vl->vldb.servers[0].s_addr), 206 vl->vldb.srvtmask[0], 207 ntohl(vl->vldb.servers[1].s_addr), 208 vl->vldb.srvtmask[1], 209 ntohl(vl->vldb.servers[2].s_addr), 210 vl->vldb.srvtmask[2]); 211 212 _debug("Vids: %08x %08x %08x", 213 vl->vldb.vid[0], 214 vl->vldb.vid[1], 215 vl->vldb.vid[2]); 216 217 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { 218 vid = vl->vldb.vid[0]; 219 voltype = AFSVL_RWVOL; 220 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { 221 vid = vl->vldb.vid[1]; 222 voltype = AFSVL_ROVOL; 223 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { 224 vid = vl->vldb.vid[2]; 225 voltype = AFSVL_BACKVOL; 226 } else { 227 BUG(); 228 vid = 0; 229 voltype = 0; 230 } 231 232 /* contact the server to make sure the volume is still available 233 * - TODO: need to handle disconnected operation here 234 */ 235 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb); 236 switch (ret) { 237 /* net error */ 238 default: 239 printk(KERN_WARNING "kAFS:" 240 " failed to update volume '%s' (%x) up in '%s': %d\n", 241 vl->vldb.name, vid, vl->cell->name, ret); 242 _leave(" = %d", ret); 243 return ret; 244 245 /* pulled from local cache into memory */ 246 case 0: 247 _leave(" = 0"); 248 return 0; 249 250 /* uh oh... looks like the volume got deleted */ 251 case -ENOMEDIUM: 252 printk(KERN_ERR "kAFS:" 253 " volume '%s' (%x) does not exist '%s'\n", 254 vl->vldb.name, vid, vl->cell->name); 255 256 /* TODO: make existing record unavailable */ 257 _leave(" = %d", ret); 258 return ret; 259 } 260} 261 262/* 263 * apply the update to a VL record 264 */ 265static void afs_vlocation_apply_update(struct afs_vlocation *vl, 266 struct afs_cache_vlocation *vldb) 267{ 268 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 269 vldb->name, vldb->vidmask, 270 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], 271 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], 272 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); 273 274 _debug("Vids: %08x %08x %08x", 275 vldb->vid[0], vldb->vid[1], vldb->vid[2]); 276 277 if (strcmp(vldb->name, vl->vldb.name) != 0) 278 printk(KERN_NOTICE "kAFS:" 279 " name of volume '%s' changed to '%s' on server\n", 280 vl->vldb.name, vldb->name); 281 282 vl->vldb = *vldb; 283 284#ifdef AFS_CACHING_SUPPORT 285 /* update volume entry in local cache */ 286 cachefs_update_cookie(vl->cache); 287#endif 288} 289 290/* 291 * fill in a volume location record, consulting the cache and the VL server 292 * both 293 */ 294static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, 295 struct key *key) 296{ 297 struct afs_cache_vlocation vldb; 298 int ret; 299 300 _enter(""); 301 302 ASSERTCMP(vl->valid, ==, 0); 303 304 memset(&vldb, 0, sizeof(vldb)); 305 306 /* see if we have an in-cache copy (will set vl->valid if there is) */ 307#ifdef AFS_CACHING_SUPPORT 308 cachefs_acquire_cookie(cell->cache, 309 &afs_volume_cache_index_def, 310 vlocation, 311 &vl->cache); 312#endif 313 314 if (vl->valid) { 315 /* try to update a known volume in the cell VL databases by 316 * ID as the name may have changed */ 317 _debug("found in cache"); 318 ret = afs_vlocation_update_record(vl, key, &vldb); 319 } else { 320 /* try to look up an unknown volume in the cell VL databases by 321 * name */ 322 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); 323 if (ret < 0) { 324 printk("kAFS: failed to locate '%s' in cell '%s'\n", 325 vl->vldb.name, vl->cell->name); 326 return ret; 327 } 328 } 329 330 afs_vlocation_apply_update(vl, &vldb); 331 _leave(" = 0"); 332 return 0; 333} 334 335/* 336 * queue a vlocation record for updates 337 */ 338void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) 339{ 340 struct afs_vlocation *xvl; 341 342 /* wait at least 10 minutes before updating... */ 343 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 344 345 spin_lock(&afs_vlocation_updates_lock); 346 347 if (!list_empty(&afs_vlocation_updates)) { 348 /* ... but wait at least 1 second more than the newest record 349 * already queued so that we don't spam the VL server suddenly 350 * with lots of requests 351 */ 352 xvl = list_entry(afs_vlocation_updates.prev, 353 struct afs_vlocation, update); 354 if (vl->update_at <= xvl->update_at) 355 vl->update_at = xvl->update_at + 1; 356 } else { 357 queue_delayed_work(afs_vlocation_update_worker, 358 &afs_vlocation_update, 359 afs_vlocation_update_timeout * HZ); 360 } 361 362 list_add_tail(&vl->update, &afs_vlocation_updates); 363 spin_unlock(&afs_vlocation_updates_lock); 364} 365 366/* 367 * lookup volume location 368 * - iterate through the VL servers in a cell until one of them admits knowing 369 * about the volume in question 370 * - lookup in the local cache if not able to find on the VL server 371 * - insert/update in the local cache if did get a VL response 372 */ 373struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, 374 struct key *key, 375 const char *name, 376 size_t namesz) 377{ 378 struct afs_vlocation *vl; 379 int ret; 380 381 _enter("{%s},{%x},%*.*s,%zu", 382 cell->name, key_serial(key), 383 (int) namesz, (int) namesz, name, namesz); 384 385 if (namesz > sizeof(vl->vldb.name)) { 386 _leave(" = -ENAMETOOLONG"); 387 return ERR_PTR(-ENAMETOOLONG); 388 } 389 390 /* see if we have an in-memory copy first */ 391 down_write(&cell->vl_sem); 392 spin_lock(&cell->vl_lock); 393 list_for_each_entry(vl, &cell->vl_list, link) { 394 if (vl->vldb.name[namesz] != '\0') 395 continue; 396 if (memcmp(vl->vldb.name, name, namesz) == 0) 397 goto found_in_memory; 398 } 399 spin_unlock(&cell->vl_lock); 400 401 /* not in the cell's in-memory lists - create a new record */ 402 vl = afs_vlocation_alloc(cell, name, namesz); 403 if (!vl) { 404 up_write(&cell->vl_sem); 405 return ERR_PTR(-ENOMEM); 406 } 407 408 afs_get_cell(cell); 409 410 list_add_tail(&vl->link, &cell->vl_list); 411 vl->state = AFS_VL_CREATING; 412 up_write(&cell->vl_sem); 413 414fill_in_record: 415 ret = afs_vlocation_fill_in_record(vl, key); 416 if (ret < 0) 417 goto error_abandon; 418 spin_lock(&vl->lock); 419 vl->state = AFS_VL_VALID; 420 spin_unlock(&vl->lock); 421 wake_up(&vl->waitq); 422 423 /* schedule for regular updates */ 424 afs_vlocation_queue_for_updates(vl); 425 goto success; 426 427found_in_memory: 428 /* found in memory */ 429 _debug("found in memory"); 430 atomic_inc(&vl->usage); 431 spin_unlock(&cell->vl_lock); 432 if (!list_empty(&vl->grave)) { 433 spin_lock(&afs_vlocation_graveyard_lock); 434 list_del_init(&vl->grave); 435 spin_unlock(&afs_vlocation_graveyard_lock); 436 } 437 up_write(&cell->vl_sem); 438 439 /* see if it was an abandoned record that we might try filling in */ 440 spin_lock(&vl->lock); 441 while (vl->state != AFS_VL_VALID) { 442 afs_vlocation_state_t state = vl->state; 443 444 _debug("invalid [state %d]", state); 445 446 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) { 447 vl->state = AFS_VL_CREATING; 448 spin_unlock(&vl->lock); 449 goto fill_in_record; 450 } 451 452 /* must now wait for creation or update by someone else to 453 * complete */ 454 _debug("wait"); 455 456 spin_unlock(&vl->lock); 457 ret = wait_event_interruptible(vl->waitq, 458 vl->state == AFS_VL_NEW || 459 vl->state == AFS_VL_VALID || 460 vl->state == AFS_VL_NO_VOLUME); 461 if (ret < 0) 462 goto error; 463 spin_lock(&vl->lock); 464 } 465 spin_unlock(&vl->lock); 466 467success: 468 _leave(" = %p",vl); 469 return vl; 470 471error_abandon: 472 spin_lock(&vl->lock); 473 vl->state = AFS_VL_NEW; 474 spin_unlock(&vl->lock); 475 wake_up(&vl->waitq); 476error: 477 ASSERT(vl != NULL); 478 afs_put_vlocation(vl); 479 _leave(" = %d", ret); 480 return ERR_PTR(ret); 481} 482 483/* 484 * finish using a volume location record 485 */ 486void afs_put_vlocation(struct afs_vlocation *vl) 487{ 488 if (!vl) 489 return; 490 491 _enter("%s", vl->vldb.name); 492 493 ASSERTCMP(atomic_read(&vl->usage), >, 0); 494 495 if (likely(!atomic_dec_and_test(&vl->usage))) { 496 _leave(""); 497 return; 498 } 499 500 spin_lock(&afs_vlocation_graveyard_lock); 501 if (atomic_read(&vl->usage) == 0) { 502 _debug("buried"); 503 list_move_tail(&vl->grave, &afs_vlocation_graveyard); 504 vl->time_of_death = get_seconds(); 505 schedule_delayed_work(&afs_vlocation_reap, 506 afs_vlocation_timeout * HZ); 507 508 /* suspend updates on this record */ 509 if (!list_empty(&vl->update)) { 510 spin_lock(&afs_vlocation_updates_lock); 511 list_del_init(&vl->update); 512 spin_unlock(&afs_vlocation_updates_lock); 513 } 514 } 515 spin_unlock(&afs_vlocation_graveyard_lock); 516 _leave(" [killed?]"); 517} 518 519/* 520 * destroy a dead volume location record 521 */ 522static void afs_vlocation_destroy(struct afs_vlocation *vl) 523{ 524 _enter("%p", vl); 525 526#ifdef AFS_CACHING_SUPPORT 527 cachefs_relinquish_cookie(vl->cache, 0); 528#endif 529 530 afs_put_cell(vl->cell); 531 kfree(vl); 532} 533 534/* 535 * reap dead volume location records 536 */ 537static void afs_vlocation_reaper(struct work_struct *work) 538{ 539 LIST_HEAD(corpses); 540 struct afs_vlocation *vl; 541 unsigned long delay, expiry; 542 time_t now; 543 544 _enter(""); 545 546 now = get_seconds(); 547 spin_lock(&afs_vlocation_graveyard_lock); 548 549 while (!list_empty(&afs_vlocation_graveyard)) { 550 vl = list_entry(afs_vlocation_graveyard.next, 551 struct afs_vlocation, grave); 552 553 _debug("check %p", vl); 554 555 /* the queue is ordered most dead first */ 556 expiry = vl->time_of_death + afs_vlocation_timeout; 557 if (expiry > now) { 558 delay = (expiry - now) * HZ; 559 _debug("delay %lu", delay); 560 if (!schedule_delayed_work(&afs_vlocation_reap, 561 delay)) { 562 cancel_delayed_work(&afs_vlocation_reap); 563 schedule_delayed_work(&afs_vlocation_reap, 564 delay); 565 } 566 break; 567 } 568 569 spin_lock(&vl->cell->vl_lock); 570 if (atomic_read(&vl->usage) > 0) { 571 _debug("no reap"); 572 list_del_init(&vl->grave); 573 } else { 574 _debug("reap"); 575 list_move_tail(&vl->grave, &corpses); 576 list_del_init(&vl->link); 577 } 578 spin_unlock(&vl->cell->vl_lock); 579 } 580 581 spin_unlock(&afs_vlocation_graveyard_lock); 582 583 /* now reap the corpses we've extracted */ 584 while (!list_empty(&corpses)) { 585 vl = list_entry(corpses.next, struct afs_vlocation, grave); 586 list_del(&vl->grave); 587 afs_vlocation_destroy(vl); 588 } 589 590 _leave(""); 591} 592 593/* 594 * initialise the VL update process 595 */ 596int __init afs_vlocation_update_init(void) 597{ 598 afs_vlocation_update_worker = 599 create_singlethread_workqueue("kafs_vlupdated"); 600 return afs_vlocation_update_worker ? 0 : -ENOMEM; 601} 602 603/* 604 * discard all the volume location records for rmmod 605 */ 606void afs_vlocation_purge(void) 607{ 608 afs_vlocation_timeout = 0; 609 610 spin_lock(&afs_vlocation_updates_lock); 611 list_del_init(&afs_vlocation_updates); 612 spin_unlock(&afs_vlocation_updates_lock); 613 cancel_delayed_work(&afs_vlocation_update); 614 queue_delayed_work(afs_vlocation_update_worker, 615 &afs_vlocation_update, 0); 616 destroy_workqueue(afs_vlocation_update_worker); 617 618 cancel_delayed_work(&afs_vlocation_reap); 619 schedule_delayed_work(&afs_vlocation_reap, 0); 620} 621 622/* 623 * update a volume location 624 */ 625static void afs_vlocation_updater(struct work_struct *work) 626{ 627 struct afs_cache_vlocation vldb; 628 struct afs_vlocation *vl, *xvl; 629 time_t now; 630 long timeout; 631 int ret; 632 633 _enter(""); 634 635 now = get_seconds(); 636 637 /* find a record to update */ 638 spin_lock(&afs_vlocation_updates_lock); 639 for (;;) { 640 if (list_empty(&afs_vlocation_updates)) { 641 spin_unlock(&afs_vlocation_updates_lock); 642 _leave(" [nothing]"); 643 return; 644 } 645 646 vl = list_entry(afs_vlocation_updates.next, 647 struct afs_vlocation, update); 648 if (atomic_read(&vl->usage) > 0) 649 break; 650 list_del_init(&vl->update); 651 } 652 653 timeout = vl->update_at - now; 654 if (timeout > 0) { 655 queue_delayed_work(afs_vlocation_update_worker, 656 &afs_vlocation_update, timeout * HZ); 657 spin_unlock(&afs_vlocation_updates_lock); 658 _leave(" [nothing]"); 659 return; 660 } 661 662 list_del_init(&vl->update); 663 atomic_inc(&vl->usage); 664 spin_unlock(&afs_vlocation_updates_lock); 665 666 /* we can now perform the update */ 667 _debug("update %s", vl->vldb.name); 668 vl->state = AFS_VL_UPDATING; 669 vl->upd_rej_cnt = 0; 670 vl->upd_busy_cnt = 0; 671 672 ret = afs_vlocation_update_record(vl, NULL, &vldb); 673 spin_lock(&vl->lock); 674 switch (ret) { 675 case 0: 676 afs_vlocation_apply_update(vl, &vldb); 677 vl->state = AFS_VL_VALID; 678 break; 679 case -ENOMEDIUM: 680 vl->state = AFS_VL_VOLUME_DELETED; 681 break; 682 default: 683 vl->state = AFS_VL_UNCERTAIN; 684 break; 685 } 686 spin_unlock(&vl->lock); 687 wake_up(&vl->waitq); 688 689 /* and then reschedule */ 690 _debug("reschedule"); 691 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 692 693 spin_lock(&afs_vlocation_updates_lock); 694 695 if (!list_empty(&afs_vlocation_updates)) { 696 /* next update in 10 minutes, but wait at least 1 second more 697 * than the newest record already queued so that we don't spam 698 * the VL server suddenly with lots of requests 699 */ 700 xvl = list_entry(afs_vlocation_updates.prev, 701 struct afs_vlocation, update); 702 if (vl->update_at <= xvl->update_at) 703 vl->update_at = xvl->update_at + 1; 704 xvl = list_entry(afs_vlocation_updates.next, 705 struct afs_vlocation, update); 706 timeout = xvl->update_at - now; 707 if (timeout < 0) 708 timeout = 0; 709 } else { 710 timeout = afs_vlocation_update_timeout; 711 } 712 713 ASSERT(list_empty(&vl->update)); 714 715 list_add_tail(&vl->update, &afs_vlocation_updates); 716 717 _debug("timeout %ld", timeout); 718 queue_delayed_work(afs_vlocation_update_worker, 719 &afs_vlocation_update, timeout * HZ); 720 spin_unlock(&afs_vlocation_updates_lock); 721 afs_put_vlocation(vl); 722} 723