1/* AFS volume location management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12#include <linux/kernel.h> 13#include <linux/module.h> 14#include <linux/slab.h> 15#include <linux/init.h> 16#include <linux/sched.h> 17#include "internal.h" 18 19static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ 20static unsigned afs_vlocation_update_timeout = 10 * 60; 21 22static void afs_vlocation_reaper(struct work_struct *); 23static void afs_vlocation_updater(struct work_struct *); 24 25static LIST_HEAD(afs_vlocation_updates); 26static LIST_HEAD(afs_vlocation_graveyard); 27static DEFINE_SPINLOCK(afs_vlocation_updates_lock); 28static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); 29static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); 30static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); 31static struct workqueue_struct *afs_vlocation_update_worker; 32 33/* 34 * iterate through the VL servers in a cell until one of them admits knowing 35 * about the volume in question 36 */ 37static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, 38 struct key *key, 39 struct afs_cache_vlocation *vldb) 40{ 41 struct afs_cell *cell = vl->cell; 42 struct in_addr addr; 43 int count, ret; 44 45 _enter("%s,%s", cell->name, vl->vldb.name); 46 47 down_write(&vl->cell->vl_sem); 48 ret = -ENOMEDIUM; 49 for (count = cell->vl_naddrs; count > 0; count--) { 50 addr = cell->vl_addrs[cell->vl_curr_svix]; 51 52 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 53 54 /* attempt to access the VL server */ 55 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb, 56 &afs_sync_call); 57 switch (ret) { 58 case 0: 59 goto out; 60 case -ENOMEM: 61 case -ENONET: 62 case -ENETUNREACH: 63 case -EHOSTUNREACH: 64 case -ECONNREFUSED: 65 if (ret == -ENOMEM || ret == -ENONET) 66 goto out; 67 goto rotate; 68 case -ENOMEDIUM: 69 case -EKEYREJECTED: 70 case -EKEYEXPIRED: 71 goto out; 72 default: 73 ret = -EIO; 74 goto rotate; 75 } 76 77 /* rotate the server records upon lookup failure */ 78 rotate: 79 cell->vl_curr_svix++; 80 cell->vl_curr_svix %= cell->vl_naddrs; 81 } 82 83out: 84 up_write(&vl->cell->vl_sem); 85 _leave(" = %d", ret); 86 return ret; 87} 88 89/* 90 * iterate through the VL servers in a cell until one of them admits knowing 91 * about the volume in question 92 */ 93static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, 94 struct key *key, 95 afs_volid_t volid, 96 afs_voltype_t voltype, 97 struct afs_cache_vlocation *vldb) 98{ 99 struct afs_cell *cell = vl->cell; 100 struct in_addr addr; 101 int count, ret; 102 103 _enter("%s,%x,%d,", cell->name, volid, voltype); 104 105 down_write(&vl->cell->vl_sem); 106 ret = -ENOMEDIUM; 107 for (count = cell->vl_naddrs; count > 0; count--) { 108 addr = cell->vl_addrs[cell->vl_curr_svix]; 109 110 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); 111 112 /* attempt to access the VL server */ 113 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb, 114 &afs_sync_call); 115 switch (ret) { 116 case 0: 117 goto out; 118 case -ENOMEM: 119 case -ENONET: 120 case -ENETUNREACH: 121 case -EHOSTUNREACH: 122 case -ECONNREFUSED: 123 if (ret == -ENOMEM || ret == -ENONET) 124 goto out; 125 goto rotate; 126 case -EBUSY: 127 vl->upd_busy_cnt++; 128 if (vl->upd_busy_cnt <= 3) { 129 if (vl->upd_busy_cnt > 1) { 130 /* second+ BUSY - sleep a little bit */ 131 set_current_state(TASK_UNINTERRUPTIBLE); 132 schedule_timeout(1); 133 __set_current_state(TASK_RUNNING); 134 } 135 continue; 136 } 137 break; 138 case -ENOMEDIUM: 139 vl->upd_rej_cnt++; 140 goto rotate; 141 default: 142 ret = -EIO; 143 goto rotate; 144 } 145 146 /* rotate the server records upon lookup failure */ 147 rotate: 148 cell->vl_curr_svix++; 149 cell->vl_curr_svix %= cell->vl_naddrs; 150 vl->upd_busy_cnt = 0; 151 } 152 153out: 154 if (ret < 0 && vl->upd_rej_cnt > 0) { 155 printk(KERN_NOTICE "kAFS:" 156 " Active volume no longer valid '%s'\n", 157 vl->vldb.name); 158 vl->valid = 0; 159 ret = -ENOMEDIUM; 160 } 161 162 up_write(&vl->cell->vl_sem); 163 _leave(" = %d", ret); 164 return ret; 165} 166 167/* 168 * allocate a volume location record 169 */ 170static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, 171 const char *name, 172 size_t namesz) 173{ 174 struct afs_vlocation *vl; 175 176 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); 177 if (vl) { 178 vl->cell = cell; 179 vl->state = AFS_VL_NEW; 180 atomic_set(&vl->usage, 1); 181 INIT_LIST_HEAD(&vl->link); 182 INIT_LIST_HEAD(&vl->grave); 183 INIT_LIST_HEAD(&vl->update); 184 init_waitqueue_head(&vl->waitq); 185 spin_lock_init(&vl->lock); 186 memcpy(vl->vldb.name, name, namesz); 187 } 188 189 _leave(" = %p", vl); 190 return vl; 191} 192 193/* 194 * update record if we found it in the cache 195 */ 196static int afs_vlocation_update_record(struct afs_vlocation *vl, 197 struct key *key, 198 struct afs_cache_vlocation *vldb) 199{ 200 afs_voltype_t voltype; 201 afs_volid_t vid; 202 int ret; 203 204 /* try to look up a cached volume in the cell VL databases by ID */ 205 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 206 vl->vldb.name, 207 vl->vldb.vidmask, 208 ntohl(vl->vldb.servers[0].s_addr), 209 vl->vldb.srvtmask[0], 210 ntohl(vl->vldb.servers[1].s_addr), 211 vl->vldb.srvtmask[1], 212 ntohl(vl->vldb.servers[2].s_addr), 213 vl->vldb.srvtmask[2]); 214 215 _debug("Vids: %08x %08x %08x", 216 vl->vldb.vid[0], 217 vl->vldb.vid[1], 218 vl->vldb.vid[2]); 219 220 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { 221 vid = vl->vldb.vid[0]; 222 voltype = AFSVL_RWVOL; 223 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { 224 vid = vl->vldb.vid[1]; 225 voltype = AFSVL_ROVOL; 226 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { 227 vid = vl->vldb.vid[2]; 228 voltype = AFSVL_BACKVOL; 229 } else { 230 BUG(); 231 vid = 0; 232 voltype = 0; 233 } 234 235 /* contact the server to make sure the volume is still available 236 * - TODO: need to handle disconnected operation here 237 */ 238 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb); 239 switch (ret) { 240 /* net error */ 241 default: 242 printk(KERN_WARNING "kAFS:" 243 " failed to update volume '%s' (%x) up in '%s': %d\n", 244 vl->vldb.name, vid, vl->cell->name, ret); 245 _leave(" = %d", ret); 246 return ret; 247 248 /* pulled from local cache into memory */ 249 case 0: 250 _leave(" = 0"); 251 return 0; 252 253 /* uh oh... looks like the volume got deleted */ 254 case -ENOMEDIUM: 255 printk(KERN_ERR "kAFS:" 256 " volume '%s' (%x) does not exist '%s'\n", 257 vl->vldb.name, vid, vl->cell->name); 258 259 /* TODO: make existing record unavailable */ 260 _leave(" = %d", ret); 261 return ret; 262 } 263} 264 265/* 266 * apply the update to a VL record 267 */ 268static void afs_vlocation_apply_update(struct afs_vlocation *vl, 269 struct afs_cache_vlocation *vldb) 270{ 271 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", 272 vldb->name, vldb->vidmask, 273 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], 274 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], 275 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); 276 277 _debug("Vids: %08x %08x %08x", 278 vldb->vid[0], vldb->vid[1], vldb->vid[2]); 279 280 if (strcmp(vldb->name, vl->vldb.name) != 0) 281 printk(KERN_NOTICE "kAFS:" 282 " name of volume '%s' changed to '%s' on server\n", 283 vl->vldb.name, vldb->name); 284 285 vl->vldb = *vldb; 286 287#ifdef CONFIG_AFS_FSCACHE 288 fscache_update_cookie(vl->cache); 289#endif 290} 291 292/* 293 * fill in a volume location record, consulting the cache and the VL server 294 * both 295 */ 296static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, 297 struct key *key) 298{ 299 struct afs_cache_vlocation vldb; 300 int ret; 301 302 _enter(""); 303 304 ASSERTCMP(vl->valid, ==, 0); 305 306 memset(&vldb, 0, sizeof(vldb)); 307 308 /* see if we have an in-cache copy (will set vl->valid if there is) */ 309#ifdef CONFIG_AFS_FSCACHE 310 vl->cache = fscache_acquire_cookie(vl->cell->cache, 311 &afs_vlocation_cache_index_def, vl); 312#endif 313 314 if (vl->valid) { 315 /* try to update a known volume in the cell VL databases by 316 * ID as the name may have changed */ 317 _debug("found in cache"); 318 ret = afs_vlocation_update_record(vl, key, &vldb); 319 } else { 320 /* try to look up an unknown volume in the cell VL databases by 321 * name */ 322 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); 323 if (ret < 0) { 324 printk("kAFS: failed to locate '%s' in cell '%s'\n", 325 vl->vldb.name, vl->cell->name); 326 return ret; 327 } 328 } 329 330 afs_vlocation_apply_update(vl, &vldb); 331 _leave(" = 0"); 332 return 0; 333} 334 335/* 336 * queue a vlocation record for updates 337 */ 338static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) 339{ 340 struct afs_vlocation *xvl; 341 342 /* wait at least 10 minutes before updating... */ 343 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 344 345 spin_lock(&afs_vlocation_updates_lock); 346 347 if (!list_empty(&afs_vlocation_updates)) { 348 /* ... but wait at least 1 second more than the newest record 349 * already queued so that we don't spam the VL server suddenly 350 * with lots of requests 351 */ 352 xvl = list_entry(afs_vlocation_updates.prev, 353 struct afs_vlocation, update); 354 if (vl->update_at <= xvl->update_at) 355 vl->update_at = xvl->update_at + 1; 356 } else { 357 queue_delayed_work(afs_vlocation_update_worker, 358 &afs_vlocation_update, 359 afs_vlocation_update_timeout * HZ); 360 } 361 362 list_add_tail(&vl->update, &afs_vlocation_updates); 363 spin_unlock(&afs_vlocation_updates_lock); 364} 365 366/* 367 * lookup volume location 368 * - iterate through the VL servers in a cell until one of them admits knowing 369 * about the volume in question 370 * - lookup in the local cache if not able to find on the VL server 371 * - insert/update in the local cache if did get a VL response 372 */ 373struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, 374 struct key *key, 375 const char *name, 376 size_t namesz) 377{ 378 struct afs_vlocation *vl; 379 int ret; 380 381 _enter("{%s},{%x},%*.*s,%zu", 382 cell->name, key_serial(key), 383 (int) namesz, (int) namesz, name, namesz); 384 385 if (namesz >= sizeof(vl->vldb.name)) { 386 _leave(" = -ENAMETOOLONG"); 387 return ERR_PTR(-ENAMETOOLONG); 388 } 389 390 /* see if we have an in-memory copy first */ 391 down_write(&cell->vl_sem); 392 spin_lock(&cell->vl_lock); 393 list_for_each_entry(vl, &cell->vl_list, link) { 394 if (vl->vldb.name[namesz] != '\0') 395 continue; 396 if (memcmp(vl->vldb.name, name, namesz) == 0) 397 goto found_in_memory; 398 } 399 spin_unlock(&cell->vl_lock); 400 401 /* not in the cell's in-memory lists - create a new record */ 402 vl = afs_vlocation_alloc(cell, name, namesz); 403 if (!vl) { 404 up_write(&cell->vl_sem); 405 return ERR_PTR(-ENOMEM); 406 } 407 408 afs_get_cell(cell); 409 410 list_add_tail(&vl->link, &cell->vl_list); 411 vl->state = AFS_VL_CREATING; 412 up_write(&cell->vl_sem); 413 414fill_in_record: 415 ret = afs_vlocation_fill_in_record(vl, key); 416 if (ret < 0) 417 goto error_abandon; 418 spin_lock(&vl->lock); 419 vl->state = AFS_VL_VALID; 420 spin_unlock(&vl->lock); 421 wake_up(&vl->waitq); 422 423 /* update volume entry in local cache */ 424#ifdef CONFIG_AFS_FSCACHE 425 fscache_update_cookie(vl->cache); 426#endif 427 428 /* schedule for regular updates */ 429 afs_vlocation_queue_for_updates(vl); 430 goto success; 431 432found_in_memory: 433 /* found in memory */ 434 _debug("found in memory"); 435 atomic_inc(&vl->usage); 436 spin_unlock(&cell->vl_lock); 437 if (!list_empty(&vl->grave)) { 438 spin_lock(&afs_vlocation_graveyard_lock); 439 list_del_init(&vl->grave); 440 spin_unlock(&afs_vlocation_graveyard_lock); 441 } 442 up_write(&cell->vl_sem); 443 444 /* see if it was an abandoned record that we might try filling in */ 445 spin_lock(&vl->lock); 446 while (vl->state != AFS_VL_VALID) { 447 afs_vlocation_state_t state = vl->state; 448 449 _debug("invalid [state %d]", state); 450 451 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) { 452 vl->state = AFS_VL_CREATING; 453 spin_unlock(&vl->lock); 454 goto fill_in_record; 455 } 456 457 /* must now wait for creation or update by someone else to 458 * complete */ 459 _debug("wait"); 460 461 spin_unlock(&vl->lock); 462 ret = wait_event_interruptible(vl->waitq, 463 vl->state == AFS_VL_NEW || 464 vl->state == AFS_VL_VALID || 465 vl->state == AFS_VL_NO_VOLUME); 466 if (ret < 0) 467 goto error; 468 spin_lock(&vl->lock); 469 } 470 spin_unlock(&vl->lock); 471 472success: 473 _leave(" = %p", vl); 474 return vl; 475 476error_abandon: 477 spin_lock(&vl->lock); 478 vl->state = AFS_VL_NEW; 479 spin_unlock(&vl->lock); 480 wake_up(&vl->waitq); 481error: 482 ASSERT(vl != NULL); 483 afs_put_vlocation(vl); 484 _leave(" = %d", ret); 485 return ERR_PTR(ret); 486} 487 488/* 489 * finish using a volume location record 490 */ 491void afs_put_vlocation(struct afs_vlocation *vl) 492{ 493 if (!vl) 494 return; 495 496 _enter("%s", vl->vldb.name); 497 498 ASSERTCMP(atomic_read(&vl->usage), >, 0); 499 500 if (likely(!atomic_dec_and_test(&vl->usage))) { 501 _leave(""); 502 return; 503 } 504 505 spin_lock(&afs_vlocation_graveyard_lock); 506 if (atomic_read(&vl->usage) == 0) { 507 _debug("buried"); 508 list_move_tail(&vl->grave, &afs_vlocation_graveyard); 509 vl->time_of_death = get_seconds(); 510 schedule_delayed_work(&afs_vlocation_reap, 511 afs_vlocation_timeout * HZ); 512 513 /* suspend updates on this record */ 514 if (!list_empty(&vl->update)) { 515 spin_lock(&afs_vlocation_updates_lock); 516 list_del_init(&vl->update); 517 spin_unlock(&afs_vlocation_updates_lock); 518 } 519 } 520 spin_unlock(&afs_vlocation_graveyard_lock); 521 _leave(" [killed?]"); 522} 523 524/* 525 * destroy a dead volume location record 526 */ 527static void afs_vlocation_destroy(struct afs_vlocation *vl) 528{ 529 _enter("%p", vl); 530 531#ifdef CONFIG_AFS_FSCACHE 532 fscache_relinquish_cookie(vl->cache, 0); 533#endif 534 afs_put_cell(vl->cell); 535 kfree(vl); 536} 537 538/* 539 * reap dead volume location records 540 */ 541static void afs_vlocation_reaper(struct work_struct *work) 542{ 543 LIST_HEAD(corpses); 544 struct afs_vlocation *vl; 545 unsigned long delay, expiry; 546 time_t now; 547 548 _enter(""); 549 550 now = get_seconds(); 551 spin_lock(&afs_vlocation_graveyard_lock); 552 553 while (!list_empty(&afs_vlocation_graveyard)) { 554 vl = list_entry(afs_vlocation_graveyard.next, 555 struct afs_vlocation, grave); 556 557 _debug("check %p", vl); 558 559 /* the queue is ordered most dead first */ 560 expiry = vl->time_of_death + afs_vlocation_timeout; 561 if (expiry > now) { 562 delay = (expiry - now) * HZ; 563 _debug("delay %lu", delay); 564 if (!schedule_delayed_work(&afs_vlocation_reap, 565 delay)) { 566 cancel_delayed_work(&afs_vlocation_reap); 567 schedule_delayed_work(&afs_vlocation_reap, 568 delay); 569 } 570 break; 571 } 572 573 spin_lock(&vl->cell->vl_lock); 574 if (atomic_read(&vl->usage) > 0) { 575 _debug("no reap"); 576 list_del_init(&vl->grave); 577 } else { 578 _debug("reap"); 579 list_move_tail(&vl->grave, &corpses); 580 list_del_init(&vl->link); 581 } 582 spin_unlock(&vl->cell->vl_lock); 583 } 584 585 spin_unlock(&afs_vlocation_graveyard_lock); 586 587 /* now reap the corpses we've extracted */ 588 while (!list_empty(&corpses)) { 589 vl = list_entry(corpses.next, struct afs_vlocation, grave); 590 list_del(&vl->grave); 591 afs_vlocation_destroy(vl); 592 } 593 594 _leave(""); 595} 596 597/* 598 * initialise the VL update process 599 */ 600int __init afs_vlocation_update_init(void) 601{ 602 afs_vlocation_update_worker = 603 create_singlethread_workqueue("kafs_vlupdated"); 604 return afs_vlocation_update_worker ? 0 : -ENOMEM; 605} 606 607/* 608 * discard all the volume location records for rmmod 609 */ 610void afs_vlocation_purge(void) 611{ 612 afs_vlocation_timeout = 0; 613 614 spin_lock(&afs_vlocation_updates_lock); 615 list_del_init(&afs_vlocation_updates); 616 spin_unlock(&afs_vlocation_updates_lock); 617 cancel_delayed_work(&afs_vlocation_update); 618 queue_delayed_work(afs_vlocation_update_worker, 619 &afs_vlocation_update, 0); 620 destroy_workqueue(afs_vlocation_update_worker); 621 622 cancel_delayed_work(&afs_vlocation_reap); 623 schedule_delayed_work(&afs_vlocation_reap, 0); 624} 625 626/* 627 * update a volume location 628 */ 629static void afs_vlocation_updater(struct work_struct *work) 630{ 631 struct afs_cache_vlocation vldb; 632 struct afs_vlocation *vl, *xvl; 633 time_t now; 634 long timeout; 635 int ret; 636 637 _enter(""); 638 639 now = get_seconds(); 640 641 /* find a record to update */ 642 spin_lock(&afs_vlocation_updates_lock); 643 for (;;) { 644 if (list_empty(&afs_vlocation_updates)) { 645 spin_unlock(&afs_vlocation_updates_lock); 646 _leave(" [nothing]"); 647 return; 648 } 649 650 vl = list_entry(afs_vlocation_updates.next, 651 struct afs_vlocation, update); 652 if (atomic_read(&vl->usage) > 0) 653 break; 654 list_del_init(&vl->update); 655 } 656 657 timeout = vl->update_at - now; 658 if (timeout > 0) { 659 queue_delayed_work(afs_vlocation_update_worker, 660 &afs_vlocation_update, timeout * HZ); 661 spin_unlock(&afs_vlocation_updates_lock); 662 _leave(" [nothing]"); 663 return; 664 } 665 666 list_del_init(&vl->update); 667 atomic_inc(&vl->usage); 668 spin_unlock(&afs_vlocation_updates_lock); 669 670 /* we can now perform the update */ 671 _debug("update %s", vl->vldb.name); 672 vl->state = AFS_VL_UPDATING; 673 vl->upd_rej_cnt = 0; 674 vl->upd_busy_cnt = 0; 675 676 ret = afs_vlocation_update_record(vl, NULL, &vldb); 677 spin_lock(&vl->lock); 678 switch (ret) { 679 case 0: 680 afs_vlocation_apply_update(vl, &vldb); 681 vl->state = AFS_VL_VALID; 682 break; 683 case -ENOMEDIUM: 684 vl->state = AFS_VL_VOLUME_DELETED; 685 break; 686 default: 687 vl->state = AFS_VL_UNCERTAIN; 688 break; 689 } 690 spin_unlock(&vl->lock); 691 wake_up(&vl->waitq); 692 693 /* and then reschedule */ 694 _debug("reschedule"); 695 vl->update_at = get_seconds() + afs_vlocation_update_timeout; 696 697 spin_lock(&afs_vlocation_updates_lock); 698 699 if (!list_empty(&afs_vlocation_updates)) { 700 /* next update in 10 minutes, but wait at least 1 second more 701 * than the newest record already queued so that we don't spam 702 * the VL server suddenly with lots of requests 703 */ 704 xvl = list_entry(afs_vlocation_updates.prev, 705 struct afs_vlocation, update); 706 if (vl->update_at <= xvl->update_at) 707 vl->update_at = xvl->update_at + 1; 708 xvl = list_entry(afs_vlocation_updates.next, 709 struct afs_vlocation, update); 710 timeout = xvl->update_at - now; 711 if (timeout < 0) 712 timeout = 0; 713 } else { 714 timeout = afs_vlocation_update_timeout; 715 } 716 717 ASSERT(list_empty(&vl->update)); 718 719 list_add_tail(&vl->update, &afs_vlocation_updates); 720 721 _debug("timeout %ld", timeout); 722 queue_delayed_work(afs_vlocation_update_worker, 723 &afs_vlocation_update, timeout * HZ); 724 spin_unlock(&afs_vlocation_updates_lock); 725 afs_put_vlocation(vl); 726} 727