1/* 2 This file contains the global device cache for the BeOS. All 3 file system I/O comes through here. The cache can handle blocks 4 of different sizes for multiple different underlying physical 5 devices. 6 7 The cache is organized as a hash table (for lookups by device 8 and block number) and two doubly-linked lists. The normal 9 list is for "normal" blocks which are either clean or dirty. 10 The locked list is for blocks that are locked in the cache by 11 BFS. The lists are LRU ordered. 12 13 Most of the work happens in the function cache_block_io() which 14 is quite lengthy. The other functions of interest are get_ents() 15 which picks victims to be kicked out of the cache; flush_ents() 16 which does the work of flushing them; and set_blocks_info() which 17 handles cloning blocks and setting callbacks so that the BFS 18 journal will work properly. If you want to modify this code it 19 will take some study but it's not too bad. Do not think about 20 separating the list of clean and dirty blocks into two lists as 21 I did that already and it's slower. 22 23 Originally this cache code was written while listening to the album 24 "Ride the Lightning" by Metallica. The current version was written 25 while listening to "Welcome to SkyValley" by Kyuss as well as an 26 ambient collection on the German label FAX. Music helps a lot when 27 writing code. 28 29 THIS CODE COPYRIGHT DOMINIC GIAMPAOLO. NO WARRANTY IS EXPRESSED 30 OR IMPLIED. YOU MAY USE THIS CODE AND FREELY DISTRIBUTE IT FOR 31 NON-COMMERCIAL USE AS LONG AS THIS NOTICE REMAINS ATTACHED. 32 33 FOR COMMERCIAL USE, CONTACT DOMINIC GIAMPAOLO (dbg@be.com). 34 35 Dominic Giampaolo 36 dbg@be.com 37*/ 38 39#include "compat.h" 40 41#include <stdio.h> 42#include <stdlib.h> 43#include <memory.h> 44#include <string.h> 45#include <errno.h> 46#include <fcntl.h> 47#include <sys/types.h> 48#include <unistd.h> 49 50 51#if (defined(__BEOS__) || defined(__HAIKU__)) 52#include <OS.h> 53#include <KernelExport.h> 54#endif 55 56#include "lock.h" 57#include "cache.h" 58 59 60 61#ifndef USER 62#define printf dprintf 63#endif 64#ifdef USER 65#define kprintf printf 66#endif 67 68 69/* forward prototypes */ 70static int flush_ents(cache_ent **ents, int n_ents); 71 72//static int do_dump(int argc, char **argv); 73//static int do_find_block(int argc, char **argv); 74//static int do_find_data(int argc, char **argv); 75//static void cache_flusher(void *arg, int phase); 76 77 78int chatty_io = 0; 79 80#define CHUNK (512 * 1024) /* a hack to work around scsi driver bugs */ 81 82size_t 83read_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize) 84{ 85 size_t ret = 0; 86 size_t sum; 87 88 if (chatty_io) 89 printf("R: %8Ld : %3d\n", bnum, num_blocks); 90 91 if (num_blocks * bsize < CHUNK) 92 ret = read_pos(fd, bnum * bsize, data, num_blocks * bsize); 93 else { 94 for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) { 95 ret = read_pos(fd, (bnum * bsize) + sum, data, CHUNK); 96 if (ret != CHUNK) 97 break; 98 99 data = (void *)((char *)data + CHUNK); 100 } 101 102 if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) { 103 ret = read_pos(fd, (bnum * bsize) + sum, data, 104 (num_blocks * bsize) - sum); 105 106 if (ret == (num_blocks * bsize) - sum) 107 ret = num_blocks * bsize; 108 } else if (ret == CHUNK) { 109 ret = num_blocks * bsize; 110 } 111 } 112 113 if (ret == num_blocks * bsize) 114 return 0; 115 else 116 return FS_EBADF; 117} 118 119size_t 120write_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize) 121{ 122 size_t ret = 0; 123 size_t sum; 124 125 if (chatty_io) 126 printf("W: %8Ld : %3d\n", bnum, num_blocks); 127 128 if (num_blocks * bsize < CHUNK) 129 ret = write_pos(fd, bnum * bsize, data, num_blocks * bsize); 130 else { 131 for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) { 132 ret = write_pos(fd, (bnum * bsize) + sum, data, CHUNK); 133 if (ret != CHUNK) 134 break; 135 136 data = (void *)((char *)data + CHUNK); 137 } 138 139 if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) { 140 ret = write_pos(fd, (bnum * bsize) + sum, data, 141 (num_blocks * bsize) - sum); 142 143 if (ret == (num_blocks * bsize) - sum) 144 ret = num_blocks * bsize; 145 } else if (ret == CHUNK) { 146 ret = num_blocks * bsize; 147 } 148 } 149 150 151 if (ret == num_blocks * bsize) 152 return 0; 153 else 154 return FS_EBADF; 155} 156 157// #pragma mark - 158 159static int 160init_hash_table(hash_table *ht) 161{ 162 ht->max = HT_DEFAULT_MAX; 163 ht->mask = ht->max - 1; 164 ht->num_elements = 0; 165 166 ht->table = (hash_ent **)calloc(ht->max, sizeof(hash_ent *)); 167 if (ht->table == NULL) 168 return FS_ENOMEM; 169 170 return 0; 171} 172 173 174static void 175shutdown_hash_table(hash_table *ht) 176{ 177 int i, hash_len; 178 hash_ent *he, *next; 179 180 for(i=0; i < ht->max; i++) { 181 he = ht->table[i]; 182 183 for(hash_len=0; he; hash_len++, he=next) { 184 next = he->next; 185 free(he); 186 } 187 } 188 189 if (ht->table) 190 free(ht->table); 191 ht->table = NULL; 192} 193 194#if 0 195static void 196print_hash_stats(hash_table *ht) 197{ 198 int i, hash_len, max = -1, sum = 0; 199 hash_ent *he, *next; 200 201 for(i=0; i < ht->max; i++) { 202 he = ht->table[i]; 203 204 for(hash_len=0; he; hash_len++, he=next) { 205 next = he->next; 206 } 207 if (hash_len) 208 printf("bucket %3d : %3d\n", i, hash_len); 209 210 sum += hash_len; 211 if (hash_len > max) 212 max = hash_len; 213 } 214 215 printf("max # of chains: %d, average chain length %d\n", max,sum/ht->max); 216} 217#endif 218 219#define HASH(d, b) ((((fs_off_t)d) << (sizeof(fs_off_t)*8 - 6)) | (b)) 220 221static hash_ent * 222new_hash_ent(int dev, fs_off_t bnum, void *data) 223{ 224 hash_ent *he; 225 226 he = (hash_ent *)malloc(sizeof(*he)); 227 if (he == NULL) 228 return NULL; 229 230 he->hash_val = HASH(dev, bnum); 231 he->dev = dev; 232 he->bnum = bnum; 233 he->data = data; 234 he->next = NULL; 235 236 return he; 237} 238 239 240static int 241grow_hash_table(hash_table *ht) 242{ 243 int i, omax, newsize, newmask; 244 fs_off_t hash; 245 hash_ent **new_table, *he, *next; 246 247 if (ht->max & ht->mask) { 248 printf("*** hashtable size %d or mask %d looks weird!\n", ht->max, 249 ht->mask); 250 } 251 252 omax = ht->max; 253 newsize = omax * 2; /* have to grow in powers of two */ 254 newmask = newsize - 1; 255 256 new_table = (hash_ent **)calloc(newsize, sizeof(hash_ent *)); 257 if (new_table == NULL) 258 return FS_ENOMEM; 259 260 for(i=0; i < omax; i++) { 261 for(he=ht->table[i]; he; he=next) { 262 hash = he->hash_val & newmask; 263 next = he->next; 264 265 he->next = new_table[hash]; 266 new_table[hash] = he; 267 } 268 } 269 270 free(ht->table); 271 ht->table = new_table; 272 ht->max = newsize; 273 ht->mask = newmask; 274 275 return 0; 276} 277 278 279 280 281static int 282hash_insert(hash_table *ht, int dev, fs_off_t bnum, void *data) 283{ 284 fs_off_t hash; 285 hash_ent *he, *curr; 286 287 hash = HASH(dev, bnum) & ht->mask; 288 289 curr = ht->table[hash]; 290 for(; curr != NULL; curr=curr->next) 291 if (curr->dev == dev && curr->bnum == bnum) 292 break; 293 294 if (curr && curr->dev == dev && curr->bnum == bnum) { 295 printf("entry %d:%Ld already in the hash table!\n", dev, bnum); 296 return FS_EEXIST; 297 } 298 299 he = new_hash_ent(dev, bnum, data); 300 if (he == NULL) 301 return FS_ENOMEM; 302 303 he->next = ht->table[hash]; 304 ht->table[hash] = he; 305 306 ht->num_elements++; 307 if (ht->num_elements >= ((ht->max * 3) / 4)) { 308 if (grow_hash_table(ht) != 0) 309 return FS_ENOMEM; 310 } 311 312 return 0; 313} 314 315static void * 316hash_lookup(hash_table *ht, int dev, fs_off_t bnum) 317{ 318 hash_ent *he; 319 320 he = ht->table[HASH(dev, bnum) & ht->mask]; 321 322 for(; he != NULL; he=he->next) { 323 if (he->dev == dev && he->bnum == bnum) 324 break; 325 } 326 327 if (he) 328 return he->data; 329 else 330 return NULL; 331} 332 333 334static void * 335hash_delete(hash_table *ht, int dev, fs_off_t bnum) 336{ 337 void *data; 338 fs_off_t hash; 339 hash_ent *he, *prev = NULL; 340 341 hash = HASH(dev, bnum) & ht->mask; 342 he = ht->table[hash]; 343 344 for(; he != NULL; prev=he,he=he->next) { 345 if (he->dev == dev && he->bnum == bnum) 346 break; 347 } 348 349 if (he == NULL) { 350 printf("*** hash_delete: tried to delete non-existent block %d:%Ld\n", 351 dev, bnum); 352 return NULL; 353 } 354 355 data = he->data; 356 357 if (ht->table[hash] == he) 358 ht->table[hash] = he->next; 359 else if (prev) 360 prev->next = he->next; 361 else 362 panic("hash table is inconsistent\n"); 363 364 free(he); 365 ht->num_elements--; 366 367 return data; 368} 369 370// #pragma mark - 371 372/* 373 These are the global variables for the cache. 374*/ 375static block_cache bc; 376 377#define MAX_IOVECS 64 /* # of iovecs for use by cache code */ 378static lock iovec_lock; 379static struct iovec *iovec_pool[MAX_IOVECS]; /* each ptr is to an array of iovecs */ 380static int iovec_used[MAX_IOVECS]; /* non-zero == iovec is in use */ 381 382#define NUM_FLUSH_BLOCKS 64 /* size of the iovec array pointed by each ptr */ 383 384 385#define DEFAULT_READ_AHEAD_SIZE (32 * 1024) 386static int read_ahead_size = DEFAULT_READ_AHEAD_SIZE; 387 388/* this array stores the size of each device so we can error check requests */ 389#define MAX_DEVICES 256 390fs_off_t max_device_blocks[MAX_DEVICES]; 391 392 393/* has the time of the last cache access so cache flushing doesn't interfere */ 394static bigtime_t last_cache_access = 0; 395 396 397int 398init_block_cache(int max_blocks, int flags) 399{ 400 memset(&bc, 0, sizeof(bc)); 401 memset(iovec_pool, 0, sizeof(iovec_pool)); 402 memset(iovec_used, 0, sizeof(iovec_used)); 403 memset(&max_device_blocks, 0, sizeof(max_device_blocks)); 404 405 if (init_hash_table(&bc.ht) != 0) 406 return FS_ENOMEM; 407 408 bc.lock.s = iovec_lock.s = -1; 409 410 bc.max_blocks = max_blocks; 411 bc.flags = flags; 412 if (new_lock(&bc.lock, "bollockcache") != 0) 413 goto err; 414 415 if (new_lock(&iovec_lock, "iovec_lock") != 0) 416 goto err; 417 418 /* allocate two of these up front so vm won't accidently re-enter itself */ 419 iovec_pool[0] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS); 420 iovec_pool[1] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS); 421 422#ifndef USER 423#ifdef DEBUG 424 add_debugger_command("bcache", do_dump, "dump the block cache list"); 425 add_debugger_command("fblock", do_find_block, "find a block in the cache"); 426 add_debugger_command("fdata", do_find_data, "find a data block ptr in the cache"); 427#endif 428 register_kernel_daemon(cache_flusher, NULL, 3); 429#endif 430 431 return 0; 432 433 err: 434 if (bc.lock.s >= 0) 435 free_lock(&bc.lock); 436 437 if (iovec_lock.s >= 0) 438 free_lock(&iovec_lock); 439 440 shutdown_hash_table(&bc.ht); 441 memset((void *)&bc, 0, sizeof(bc)); 442 return FS_ENOMEM; 443} 444 445 446static struct iovec * 447get_iovec_array(void) 448{ 449 int i; 450 struct iovec *iov; 451 452 LOCK(iovec_lock); 453 454 for(i=0; i < MAX_IOVECS; i++) { 455 if (iovec_used[i] == 0) 456 break; 457 } 458 459 if (i >= MAX_IOVECS) /* uh-oh */ 460 panic("cache: ran out of iovecs (pool 0x%x, used 0x%x)!\n", 461 &iovec_pool[0], &iovec_used[0]); 462 463 if (iovec_pool[i] == NULL) { 464 iovec_pool[i] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS); 465 if (iovec_pool == NULL) 466 panic("can't allocate an iovec!\n"); 467 } 468 469 iov = iovec_pool[i]; 470 iovec_used[i] = 1; 471 472 UNLOCK(iovec_lock); 473 474 return iov; 475} 476 477 478static void 479release_iovec_array(struct iovec *iov) 480{ 481 int i; 482 483 LOCK(iovec_lock); 484 485 for(i=0; i < MAX_IOVECS; i++) { 486 if (iov == iovec_pool[i]) 487 break; 488 } 489 490 if (i < MAX_IOVECS) 491 iovec_used[i] = 0; 492 else /* uh-oh */ 493 printf("cache: released an iovec I don't own (iov %p)\n", iov); 494 495 496 UNLOCK(iovec_lock); 497} 498 499 500 501 502static void 503real_dump_cache_list(cache_ent_list *cel) 504{ 505 cache_ent *ce; 506 507 kprintf("starting from LRU end:\n"); 508 509 for (ce = cel->lru; ce; ce = ce->next) { 510 kprintf("ce %p dev %2d bnum %6Ld lock %d flag %d arg %p " 511 "clone %p\n", ce, ce->dev, ce->block_num, ce->lock, 512 ce->flags, ce->arg, ce->clone); 513 } 514 kprintf("MRU end\n"); 515} 516 517static void 518dump_cache_list(void) 519{ 520 kprintf("NORMAL BLOCKS\n"); 521 real_dump_cache_list(&bc.normal); 522 523 kprintf("LOCKED BLOCKS\n"); 524 real_dump_cache_list(&bc.locked); 525 526 kprintf("cur blocks %d, max blocks %d ht @ %p\n", bc.cur_blocks, 527 bc.max_blocks, &bc.ht); 528} 529 530#if 0 531static void 532check_bcache(char *str) 533{ 534 int count = 0; 535 cache_ent *ce, *prev = NULL; 536 537 LOCK(bc.lock); 538 539 for(ce=bc.normal.lru; ce; prev=ce, ce=ce->next) { 540 count++; 541 } 542 543 for(ce=bc.locked.lru; ce; prev=ce, ce=ce->next) { 544 count++; 545 } 546 547 if (count != bc.cur_blocks) { 548 if (count < bc.cur_blocks - 16) 549 panic("%s: count == %d, cur_blocks %d, prev 0x%x\n", 550 str, count, bc.cur_blocks, prev); 551 else 552 printf("%s: count == %d, cur_blocks %d, prev 0x%x\n", 553 str, count, bc.cur_blocks, prev); 554 } 555 556 UNLOCK(bc.lock); 557} 558 559 560static void 561dump_lists(void) 562{ 563 cache_ent *nce; 564 565 printf("LOCKED 0x%x (tail 0x%x, head 0x%x)\n", &bc.locked, 566 bc.locked.lru, bc.locked.mru); 567 for(nce=bc.locked.lru; nce; nce=nce->next) 568 printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n", 569 nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone, 570 nce->func); 571 572 printf("NORMAL 0x%x (tail 0x%x, head 0x%x)\n", &bc.normal, 573 bc.normal.lru, bc.normal.mru); 574 for(nce=bc.normal.lru; nce; nce=nce->next) 575 printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n", 576 nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone, 577 nce->func); 578} 579 580 581static void 582check_lists(void) 583{ 584 cache_ent *ce, *prev, *oce; 585 cache_ent_list *cel; 586 587 cel = &bc.normal; 588 for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) { 589 for(oce=bc.locked.lru; oce; oce=oce->next) { 590 if (oce == ce) { 591 dump_lists(); 592 panic("1:ce @ 0x%x is in two lists(cel 0x%x &LOCKED)\n",ce,cel); 593 } 594 } 595 } 596 if (prev && prev != cel->mru) { 597 dump_lists(); 598 panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n", 599 prev, cel); 600 } 601 602 cel = &bc.locked; 603 for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) { 604 for(oce=bc.normal.lru; oce; oce=oce->next) { 605 if (oce == ce) { 606 dump_lists(); 607 panic("3:ce @ 0x%x is in two lists(cel 0x%x & DIRTY)\n",ce,cel); 608 } 609 } 610 } 611 if (prev && prev != cel->mru) { 612 dump_lists(); 613 panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n", 614 prev, cel); 615 } 616} 617#endif 618 619 620#ifdef DEBUG 621#if 0 622static int 623do_dump(int argc, char **argv) 624{ 625 dump_cache_list(); 626 return 1; 627} 628 629 630static int 631do_find_block(int argc, char **argv) 632{ 633 int i; 634 fs_off_t bnum; 635 cache_ent *ce; 636 637 if (argc < 2) { 638 kprintf("%s: needs a block # argument\n", argv[0]); 639 return 1; 640 } 641 642 for(i=1; i < argc; i++) { 643 bnum = strtoul(argv[i], NULL, 0); 644 645 for(ce=bc.normal.lru; ce; ce=ce->next) { 646 if (ce->block_num == bnum) { 647 kprintf("found clean bnum %ld @ 0x%lx (data @ 0x%lx)\n", 648 bnum, ce, ce->data); 649 } 650 } 651 652 for(ce=bc.locked.lru; ce; ce=ce->next) { 653 if (ce->block_num == bnum) { 654 kprintf("found locked bnum %ld @ 0x%lx (data @ 0x%lx)\n", 655 bnum, ce, ce->data); 656 } 657 } 658 } 659 660 return 0; 661} 662 663static int 664do_find_data(int argc, char **argv) 665{ 666 int i; 667 void *data; 668 cache_ent *ce; 669 670 if (argc < 2) { 671 kprintf("%s: needs a block # argument\n", argv[0]); 672 return 1; 673 } 674 675 for(i=1; i < argc; i++) { 676 data = (void *)strtoul(argv[i], NULL, 0); 677 678 for(ce=bc.normal.lru; ce; ce=ce->next) { 679 if (ce->data == data) { 680 kprintf("found normal data ptr for bnum %ld @ ce 0x%lx\n", 681 ce->block_num, ce); 682 } 683 } 684 685 for(ce=bc.locked.lru; ce; ce=ce->next) { 686 if (ce->data == data) { 687 kprintf("found locked data ptr for bnum %ld @ ce 0x%lx\n", 688 ce->block_num, ce); 689 } 690 } 691 } 692 693 return 0; 694} 695#endif 696#endif /* DEBUG */ 697 698 699 700/* 701 this function detaches the cache_ent from the list. 702*/ 703static void 704delete_from_list(cache_ent_list *cel, cache_ent *ce) 705{ 706 if (ce->next) 707 ce->next->prev = ce->prev; 708 if (ce->prev) 709 ce->prev->next = ce->next; 710 711 if (cel->lru == ce) 712 cel->lru = ce->next; 713 if (cel->mru == ce) 714 cel->mru = ce->prev; 715 716 ce->next = NULL; 717 ce->prev = NULL; 718} 719 720 721 722/* 723 this function adds the cache_ent ce to the head of the 724 list (i.e. the MRU end). the cache_ent should *not* 725 be in any lists. 726*/ 727static void 728add_to_head(cache_ent_list *cel, cache_ent *ce) 729{ 730if (ce->next != NULL || ce->prev != NULL) { 731 panic("*** ath: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n", 732 ce, ce->next, ce->prev); 733} 734 735 ce->next = NULL; 736 ce->prev = cel->mru; 737 738 if (cel->mru) 739 cel->mru->next = ce; 740 cel->mru = ce; 741 742 if (cel->lru == NULL) 743 cel->lru = ce; 744} 745 746 747/* 748 this function adds the cache_ent ce to the tail of the 749 list (i.e. the MRU end). the cache_ent should *not* 750 be in any lists. 751*/ 752static void 753add_to_tail(cache_ent_list *cel, cache_ent *ce) 754{ 755if (ce->next != NULL || ce->prev != NULL) { 756 panic("*** att: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n", 757 ce, ce->next, ce->prev); 758} 759 760 ce->next = cel->lru; 761 ce->prev = NULL; 762 763 if (cel->lru) 764 cel->lru->prev = ce; 765 cel->lru = ce; 766 767 if (cel->mru == NULL) 768 cel->mru = ce; 769} 770 771 772static int 773cache_ent_cmp(const void *a, const void *b) 774{ 775 fs_off_t diff; 776 cache_ent *p1 = *(cache_ent **)a, *p2 = *(cache_ent **)b; 777 778 if (p1 == NULL || p2 == NULL) 779 panic("cache_ent pointers are null?!? (a 0x%lx, b 0x%lx\n)\n", a, b); 780 781 if (p1->dev == p2->dev) { 782 diff = p1->block_num - p2->block_num; 783 return (int)diff; 784 } else { 785 return p1->dev - p2->dev; 786 } 787} 788 789#if 0 790// ToDo: add this to the fsh (as a background thread)? 791static void 792cache_flusher(void *arg, int phase) 793{ 794 int i, num_ents, err; 795 bigtime_t now = system_time(); 796 static cache_ent *ce = NULL; 797 static cache_ent *ents[NUM_FLUSH_BLOCKS]; 798 799 /* 800 if someone else was in the cache recently then just bail out so 801 we don't lock them out unnecessarily 802 */ 803 if ((now - last_cache_access) < 1000000) 804 return; 805 806 LOCK(bc.lock); 807 808 ce = bc.normal.lru; 809 810 for(num_ents=0; ce && num_ents < NUM_FLUSH_BLOCKS; ce=ce->next) { 811 if (ce->flags & CE_BUSY) 812 continue; 813 814 if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL) 815 continue; 816 817 ents[num_ents] = ce; 818 ents[num_ents]->flags |= CE_BUSY; 819 num_ents++; 820 } 821 822 /* if we've got some room left over, look for cloned locked blocks */ 823 if (num_ents < NUM_FLUSH_BLOCKS) { 824 ce = bc.locked.lru; 825 826 for(; num_ents < NUM_FLUSH_BLOCKS;) { 827 for(; 828 ce && ((ce->flags & CE_BUSY) || ce->clone == NULL); 829 ce=ce->next) 830 /* skip ents that meet the above criteria */; 831 832 if (ce == NULL) 833 break; 834 835 ents[num_ents] = ce; 836 ents[num_ents]->flags |= CE_BUSY; 837 ce = ce->next; 838 num_ents++; 839 } 840 } 841 842 UNLOCK(bc.lock); 843 844 if (num_ents == 0) 845 return; 846 847 qsort(ents, num_ents, sizeof(cache_ent **), cache_ent_cmp); 848 849 if ((err = flush_ents(ents, num_ents)) != 0) { 850 printf("flush ents failed (ents @ 0x%lx, num_ents %d!\n", 851 (ulong)ents, num_ents); 852 } 853 854 for(i=0; i < num_ents; i++) { /* clear the busy bit on each of ent */ 855 ents[i]->flags &= ~CE_BUSY; 856 } 857} 858#endif 859 860 861static int 862flush_cache_ent(cache_ent *ce) 863{ 864 int ret = 0; 865 void *data; 866 867 /* if true, then there's nothing to flush */ 868 if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL) 869 return 0; 870 871 /* same thing here */ 872 if (ce->clone == NULL && ce->lock != 0) 873 return 0; 874 875 restart: 876 if (ce->clone) 877 data = ce->clone; 878 else 879 data = ce->data; 880 881 if (chatty_io > 2) printf("flush: %7Ld\n", ce->block_num); 882 ret = write_phys_blocks(ce->dev, ce->block_num, data, 1, ce->bsize); 883 884 if (ce->func) { 885 ce->func(ce->logged_bnum, 1, ce->arg); 886 ce->func = NULL; 887 } 888 889 if (ce->clone) { 890 free(ce->clone); 891 ce->clone = NULL; 892 893 if (ce->lock == 0 && (ce->flags & CE_DIRTY)) 894 goto restart; /* also write the real data ptr */ 895 } else { 896 ce->flags &= ~CE_DIRTY; 897 } 898 899 return ret; 900} 901 902 903static int 904flush_ents(cache_ent **ents, int n_ents) 905{ 906 int i, j, k, ret = 0, bsize, iocnt, do_again = 0; 907 fs_off_t start_bnum; 908 struct iovec *iov; 909 910 iov = get_iovec_array(); 911 if (iov == NULL) 912 return FS_ENOMEM; 913 914restart: 915 for(i=0; i < n_ents; i++) { 916 /* if true, then there's nothing to flush */ 917 if ((ents[i]->flags & CE_DIRTY) == 0 && ents[i]->clone == NULL) 918 continue; 919 920 /* if true we can't touch the dirty data yet because it's locked */ 921 if (ents[i]->clone == NULL && ents[i]->lock != 0) 922 continue; 923 924 925 bsize = ents[i]->bsize; 926 start_bnum = ents[i]->block_num; 927 928 for(j=i+1; j < n_ents && (j - i) < NUM_FLUSH_BLOCKS; j++) { 929 if (ents[j]->dev != ents[i]->dev || 930 ents[j]->block_num != start_bnum + (j - i)) 931 break; 932 933 if (ents[j]->clone == NULL && ents[j]->lock != 0) 934 break; 935 } 936 937 if (j == i+1) { /* only one block, just flush it directly */ 938 if ((ret = flush_cache_ent(ents[i])) != 0) 939 break; 940 continue; 941 } 942 943 944 for(k=i,iocnt=0; k < j; k++,iocnt++) { 945 if (ents[k]->clone) 946 iov[iocnt].iov_base = ents[k]->clone; 947 else 948 iov[iocnt].iov_base = ents[k]->data; 949 950 iov[iocnt].iov_len = bsize; 951 } 952 953 if (chatty_io) 954 printf("writev @ %Ld for %d blocks\n", start_bnum, iocnt); 955 956 ret = writev_pos(ents[i]->dev, start_bnum * (fs_off_t)bsize, 957 &iov[0], iocnt); 958 if (ret != iocnt*bsize) { 959 int idx; 960 961 printf("flush_ents: writev failed: iocnt %d start bnum %Ld " 962 "bsize %d, ret %d\n", iocnt, start_bnum, bsize, ret); 963 964 for(idx=0; idx < iocnt; idx++) 965 printf("iov[%2d] = %p :: %ld\n", idx, iov[idx].iov_base, 966 (long)iov[idx].iov_len); 967 968 printf("error %s writing blocks %Ld:%d (%d != %d)\n", 969 strerror(errno), start_bnum, iocnt, ret, iocnt*bsize); 970 ret = FS_EINVAL; 971 break; 972 } 973 ret = 0; 974 975 976 for(k=i; k < j; k++) { 977 if (ents[k]->func) { 978 ents[k]->func(ents[k]->logged_bnum, 1, ents[k]->arg); 979 ents[k]->func = NULL; 980 } 981 982 if (ents[k]->clone) { 983 free(ents[k]->clone); 984 ents[k]->clone = NULL; 985 } else { 986 ents[k]->flags &= ~CE_DIRTY; 987 } 988 } 989 990 991 i = j - 1; /* i gets incremented by the outer for loop */ 992 } 993 994 /* 995 here we have to go back through and flush any blocks that are 996 still dirty. with an arched brow you astutely ask, "but how 997 could this happen given the above loop?" Ahhh young grasshopper 998 I say, the path through the cache is long and twisty and fraught 999 with peril. The reason it can happen is that a block can be both 1000 cloned and dirty. The above loop would only flush the cloned half 1001 of the data, not the main dirty block. So we have to go back 1002 through and see if there are any blocks that are still dirty. If 1003 there are we go back to the top of the function and do the whole 1004 thing over. Kind of grody but it is necessary to insure the 1005 correctness of the log for the Be file system. 1006 */ 1007 if (do_again == 0) { 1008 for(i=0; i < n_ents; i++) { 1009 if ((ents[i]->flags & CE_DIRTY) == 0 || ents[i]->lock) 1010 continue; 1011 1012 do_again = 1; 1013 break; 1014 } 1015 1016 if (do_again) 1017 goto restart; 1018 } 1019 1020 release_iovec_array(iov); 1021 1022 1023 return ret; 1024} 1025 1026static void 1027delete_cache_list(cache_ent_list *cel) 1028{ 1029 void *junk; 1030 cache_ent *ce, *next; 1031 1032 for (ce = cel->lru; ce; ce = next) { 1033 next = ce->next; 1034 if (ce->lock != 0) { 1035 if (ce->func) 1036 printf("*** shutdown_block_cache: block %Ld, lock == %d " 1037 "(arg %p)!\n", ce->block_num, ce->lock, ce->arg); 1038 else 1039 printf("*** shutdown_block_cache: block %Ld, lock == %d!\n", 1040 ce->block_num, ce->lock); 1041 } 1042 1043 if (ce->flags & CE_BUSY) { 1044 printf("* shutdown block cache: bnum %Ld is busy? ce %p\n", 1045 ce->block_num, ce); 1046 } 1047 1048 if ((ce->flags & CE_DIRTY) || ce->clone) { 1049 flush_cache_ent(ce); 1050 } 1051 1052 if (ce->clone) 1053 free(ce->clone); 1054 ce->clone = NULL; 1055 1056 if (ce->data) 1057 free(ce->data); 1058 ce->data = NULL; 1059 1060 if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) { 1061 printf("*** free_device_cache: bad hash table entry %Ld " 1062 "%p != %p\n", ce->block_num, junk, ce); 1063 } 1064 1065 memset(ce, 0xfd, sizeof(*ce)); 1066 free(ce); 1067 1068 bc.cur_blocks--; 1069 } 1070} 1071 1072 1073void 1074shutdown_block_cache(void) 1075{ 1076 /* print_hash_stats(&bc.ht); */ 1077 1078 if (bc.lock.s > 0) 1079 LOCK(bc.lock); 1080 1081#ifndef USER 1082 unregister_kernel_daemon(cache_flusher, NULL); 1083#endif 1084 1085 delete_cache_list(&bc.normal); 1086 delete_cache_list(&bc.locked); 1087 1088 bc.normal.lru = bc.normal.mru = NULL; 1089 bc.locked.lru = bc.locked.mru = NULL; 1090 1091 shutdown_hash_table(&bc.ht); 1092 1093 if (bc.lock.s > 0) 1094 free_lock(&bc.lock); 1095 bc.lock.s = -1; 1096 1097 if (iovec_lock.s >= 0) 1098 free_lock(&iovec_lock); 1099} 1100 1101 1102 1103int 1104init_cache_for_device(int fd, fs_off_t max_blocks) 1105{ 1106 int ret = 0; 1107 1108printf("init_cache_for_device(%d, %lld)\n", fd, max_blocks); 1109 1110 if (fd >= MAX_DEVICES) 1111 return -1; 1112 1113 LOCK(bc.lock); 1114 1115 if (max_device_blocks[fd] != 0) { 1116 printf("device %d is already initialized!\n", fd); 1117 ret = -1; 1118 } else { 1119 max_device_blocks[fd] = max_blocks; 1120 } 1121 1122 UNLOCK(bc.lock); 1123 1124 return ret; 1125} 1126 1127 1128 1129/* 1130 this routine assumes that bc.lock has been acquired 1131*/ 1132static cache_ent * 1133block_lookup(int dev, fs_off_t bnum) 1134{ 1135 int count = 0; 1136 cache_ent *ce; 1137 1138 while (1) { 1139 ce = hash_lookup(&bc.ht, dev, bnum); 1140 if (ce == NULL) 1141 return NULL; 1142 1143 if ((ce->flags & CE_BUSY) == 0) /* it's ok, break out and return it */ 1144 break; 1145 1146 /* else, it's busy and we need to retry our lookup */ 1147 UNLOCK(bc.lock); 1148 1149 snooze(5000); 1150 if (count++ == 5000) { /* then a lot of time has elapsed */ 1151 printf("block %Ld isn't coming un-busy (ce @ %p)\n", 1152 ce->block_num, ce); 1153 } 1154 1155 LOCK(bc.lock); 1156 } 1157 1158 if (ce->flags & CE_BUSY) 1159 panic("block lookup: returning a busy block @ 0x%lx?!?\n",(ulong)ce); 1160 1161 return ce; 1162} 1163 1164 1165 1166int 1167set_blocks_info(int dev, fs_off_t *blocks, int nblocks, 1168 void (*func)(fs_off_t bnum, size_t nblocks, void *arg), void *arg) 1169{ 1170 int i, j, cur; 1171 cache_ent *ce; 1172 cache_ent *ents[NUM_FLUSH_BLOCKS]; 1173 1174 LOCK(bc.lock); 1175 1176 1177 for(i=0, cur=0; i < nblocks; i++) { 1178 1179 /* printf("sbi: %ld (arg 0x%x)\n", blocks[i], arg); */ 1180 ce = block_lookup(dev, blocks[i]); 1181 if (ce == NULL) { 1182 panic("*** set_block_info can't find bnum %ld!\n", blocks[i]); 1183 UNLOCK(bc.lock); 1184 return FS_ENOENT; /* hopefully this doesn't happen... */ 1185 } 1186 1187 1188 if (blocks[i] != ce->block_num || dev != ce->dev) { 1189 UNLOCK(bc.lock); 1190 panic("** error1: looked up dev %d block %ld but found dev %d " 1191 "bnum %ld\n", dev, blocks[i], ce->dev, ce->block_num); 1192 return FS_EBADF; 1193 } 1194 1195 if (ce->lock == 0) { 1196 panic("* set_block_info on bnum %ld (%d) but it's not locked!\n", 1197 blocks[i], nblocks); 1198 } 1199 1200 1201 if ((ce->flags & CE_DIRTY) == 0) { 1202 panic("*** set_block_info on non-dirty block bnum %ld (%d)!\n", 1203 blocks[i], nblocks); 1204 } 1205 1206 ce->flags |= CE_BUSY; /* mark all blocks as busy till we're done */ 1207 1208 /* if there is cloned data, it needs to be flushed now */ 1209 if (ce->clone && ce->func) { 1210 ents[cur++] = ce; 1211 1212 if (cur >= NUM_FLUSH_BLOCKS) { 1213 UNLOCK(bc.lock); 1214 1215 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp); 1216 1217 flush_ents(ents, cur); 1218 1219 LOCK(bc.lock); 1220 for(j=0; j < cur; j++) 1221 ents[j]->flags &= ~CE_BUSY; 1222 cur = 0; 1223 } 1224 } 1225 } 1226 1227 1228 if (cur != 0) { 1229 UNLOCK(bc.lock); 1230 1231 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp); 1232 1233 flush_ents(ents, cur); 1234 1235 LOCK(bc.lock); 1236 for(j=0; j < cur; j++) 1237 ents[j]->flags &= ~CE_BUSY; 1238 cur = 0; 1239 } 1240 1241 1242 /* now go through and set the info that we were asked to */ 1243 for (i = 0; i < nblocks; i++) { 1244 /* we can call hash_lookup() here because we know it's around */ 1245 ce = hash_lookup(&bc.ht, dev, blocks[i]); 1246 if (ce == NULL) { 1247 panic("*** set_block_info can't find bnum %Ld!\n", blocks[i]); 1248 UNLOCK(bc.lock); 1249 return FS_ENOENT; /* hopefully this doesn't happen... */ 1250 } 1251 1252 ce->flags &= ~(CE_DIRTY | CE_BUSY); 1253 1254 if (ce->func != NULL) { 1255 panic("*** set_block_info non-null callback on bnum %Ld\n", 1256 ce->block_num); 1257 } 1258 1259 if (ce->clone != NULL) { 1260 panic("*** ce->clone == %p, not NULL in set_block_info\n", 1261 ce->clone); 1262 } 1263 1264 ce->clone = (void *)malloc(ce->bsize); 1265 if (ce->clone == NULL) 1266 panic("*** can't clone bnum %Ld (bsize %d)\n", 1267 ce->block_num, ce->bsize); 1268 1269 1270 memcpy(ce->clone, ce->data, ce->bsize); 1271 1272 ce->func = func; 1273 ce->arg = arg; 1274 1275 ce->logged_bnum = blocks[i]; 1276 1277 ce->lock--; 1278 if (ce->lock < 0) { 1279 printf("sbi: whoa nellie! ce @ %p (%Ld) has lock == %d\n", 1280 ce, ce->block_num, ce->lock); 1281 } 1282 1283 if (ce->lock == 0) { 1284 delete_from_list(&bc.locked, ce); 1285 add_to_head(&bc.normal, ce); 1286 } 1287 } 1288 1289 UNLOCK(bc.lock); 1290 1291 return 0; 1292} 1293 1294 1295/* this function is only for use by flush_device() */ 1296static void 1297do_flush(cache_ent **ents, int max) 1298{ 1299 int i; 1300 1301 for(i=0; i < max; i++) { 1302 ents[i]->flags |= CE_BUSY; 1303 } 1304 1305 UNLOCK(bc.lock); 1306 1307 qsort(ents, max, sizeof(cache_ent **), cache_ent_cmp); 1308 flush_ents(ents, max); 1309 1310 LOCK(bc.lock); 1311 for(i=0; i < max; i++) { 1312 ents[i]->flags &= ~CE_BUSY; 1313 } 1314} 1315 1316int 1317flush_device(int dev, int warn_locked) 1318{ 1319 int cur; 1320 cache_ent *ce; 1321 cache_ent *ents[NUM_FLUSH_BLOCKS]; 1322 1323 LOCK(bc.lock); 1324 1325 cur = 0; 1326 ce = bc.normal.lru; 1327 while (ce) { 1328 if (ce->dev != dev || (ce->flags & CE_BUSY)) { 1329 ce = ce->next; 1330 continue; 1331 } 1332 1333 if ((ce->flags & CE_DIRTY) || ce->clone) { 1334 ents[cur++] = ce; 1335 if (cur >= NUM_FLUSH_BLOCKS) { 1336 do_flush(ents, cur); 1337 1338 ce = bc.normal.lru; 1339 cur = 0; 1340 continue; 1341 } 1342 } 1343 1344 ce = ce->next; 1345 } 1346 1347 if (cur != 0) 1348 do_flush(ents, cur); 1349 1350 cur = 0; 1351 ce = bc.locked.lru; 1352 while (ce) { 1353 if (ce->dev != dev || (ce->flags & CE_BUSY)) { 1354 ce = ce->next; 1355 continue; 1356 } 1357 1358 if (ce->clone) { 1359 ents[cur++] = ce; 1360 if (cur >= NUM_FLUSH_BLOCKS) { 1361 do_flush(ents, cur); 1362 1363 ce = bc.locked.lru; 1364 cur = 0; 1365 continue; 1366 } 1367 } 1368 1369 ce = ce->next; 1370 } 1371 1372 if (cur != 0) 1373 do_flush(ents, cur); 1374 1375 UNLOCK(bc.lock); 1376 1377 return 0; 1378} 1379 1380 1381static void 1382real_remove_cached_blocks(int dev, int allow_writes, cache_ent_list *cel) 1383{ 1384 void *junk; 1385 cache_ent *ce, *next = NULL; 1386 1387 for(ce=cel->lru; ce; ce=next) { 1388 next = ce->next; 1389 1390 if (ce->dev != dev) { 1391 continue; 1392 } 1393 1394 if (ce->lock != 0 || (ce->flags & CE_BUSY)) { 1395 printf("*** remove_cached_dev: block %Ld has lock = %d, flags " 1396 "0x%x! ce @ 0x%lx\n", ce->block_num, ce->lock, ce->flags, 1397 (ulong)ce); 1398 } 1399 1400 if (allow_writes == ALLOW_WRITES && 1401 ((ce->flags & CE_DIRTY) || ce->clone)) { 1402 ce->flags |= CE_BUSY; 1403 flush_cache_ent(ce); 1404 ce->flags &= ~CE_BUSY; 1405 } 1406 1407 /* unlink this guy */ 1408 if (cel->lru == ce) 1409 cel->lru = ce->next; 1410 1411 if (cel->mru == ce) 1412 cel->mru = ce->prev; 1413 1414 if (ce->prev) 1415 ce->prev->next = ce->next; 1416 if (ce->next) 1417 ce->next->prev = ce->prev; 1418 1419 if (ce->clone) 1420 free(ce->clone); 1421 ce->clone = NULL; 1422 1423 if (ce->data) 1424 free(ce->data); 1425 ce->data = NULL; 1426 1427 if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) { 1428 panic("*** remove_cached_device: bad hash table entry %ld " 1429 "0x%lx != 0x%lx\n", ce->block_num, (ulong)junk, (ulong)ce); 1430 } 1431 1432 free(ce); 1433 1434 bc.cur_blocks--; 1435 } 1436 1437} 1438 1439int 1440remove_cached_device_blocks(int dev, int allow_writes) 1441{ 1442printf("remove_cached_device_blocks(%d, %d)\n", dev, allow_writes); 1443 1444 LOCK(bc.lock); 1445 1446 real_remove_cached_blocks(dev, allow_writes, &bc.normal); 1447 real_remove_cached_blocks(dev, allow_writes, &bc.locked); 1448 1449 max_device_blocks[dev] = 0; 1450 1451 UNLOCK(bc.lock); 1452 1453 return 0; 1454} 1455 1456 1457 1458int 1459flush_blocks(int dev, fs_off_t bnum, int nblocks) 1460{ 1461 int cur, i; 1462 cache_ent *ce; 1463 cache_ent *ents[NUM_FLUSH_BLOCKS]; 1464 1465 if (nblocks == 0) /* might as well check for this */ 1466 return 0; 1467 1468 LOCK(bc.lock); 1469 1470 cur = 0; 1471 for(; nblocks > 0; nblocks--, bnum++) { 1472 ce = block_lookup(dev, bnum); 1473 if (ce == NULL) 1474 continue; 1475 1476 if (bnum != ce->block_num || dev != ce->dev) { 1477 UNLOCK(bc.lock); 1478 panic("error2: looked up dev %d block %ld but found %d %ld\n", 1479 dev, bnum, ce->dev, ce->block_num); 1480 return FS_EBADF; 1481 } 1482 1483 if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL) 1484 continue; 1485 1486 ce->flags |= CE_BUSY; 1487 ents[cur++] = ce; 1488 1489 if (cur >= NUM_FLUSH_BLOCKS) { 1490 UNLOCK(bc.lock); 1491 1492 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp); 1493 flush_ents(ents, cur); 1494 1495 LOCK(bc.lock); 1496 for(i=0; i < cur; i++) { 1497 ents[i]->flags &= ~CE_BUSY; 1498 } 1499 cur = 0; 1500 } 1501 } 1502 1503 UNLOCK(bc.lock); 1504 1505 if (cur == 0) /* nothing more to do */ 1506 return 0; 1507 1508 /* flush out the last few buggers */ 1509 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp); 1510 flush_ents(ents, cur); 1511 1512 for(i=0; i < cur; i++) { 1513 ents[i]->flags &= ~CE_BUSY; 1514 } 1515 1516 return 0; 1517} 1518 1519 1520int 1521mark_blocks_dirty(int dev, fs_off_t bnum, int nblocks) 1522{ 1523 int ret = 0; 1524 cache_ent *ce; 1525 1526 LOCK(bc.lock); 1527 1528 while(nblocks > 0) { 1529 ce = block_lookup(dev, bnum); 1530 if (ce) { 1531 ce->flags |= CE_DIRTY; 1532 bnum += 1; 1533 nblocks -= 1; 1534 } else { /* hmmm, that's odd, didn't find it */ 1535 printf("** mark_blocks_diry couldn't find block %Ld (len %d)\n", 1536 bnum, nblocks); 1537 ret = FS_ENOENT; 1538 break; 1539 } 1540 } 1541 1542 UNLOCK(bc.lock); 1543 1544 return ret; 1545} 1546 1547 1548 1549int 1550release_block(int dev, fs_off_t bnum) 1551{ 1552 cache_ent *ce; 1553 1554 /* printf("rlsb: %ld\n", bnum); */ 1555 LOCK(bc.lock); 1556 1557 ce = block_lookup(dev, bnum); 1558 if (ce) { 1559 if (bnum != ce->block_num || dev != ce->dev) { 1560 panic("*** error3: looked up dev %d block %ld but found %d %ld\n", 1561 dev, bnum, ce->dev, ce->block_num); 1562 UNLOCK(bc.lock); 1563 return FS_EBADF; 1564 } 1565 1566 ce->lock--; 1567 1568 if (ce->lock < 0) { 1569 printf("rlsb: whoa nellie! ce %Ld has lock == %d\n", 1570 ce->block_num, ce->lock); 1571 } 1572 1573 if (ce->lock == 0) { 1574 delete_from_list(&bc.locked, ce); 1575 add_to_head(&bc.normal, ce); 1576 } 1577 1578 } else { /* hmmm, that's odd, didn't find it */ 1579 panic("** release_block asked to find %ld but it's not here\n", 1580 bnum); 1581 } 1582 1583 UNLOCK(bc.lock); 1584 1585 return 0; 1586} 1587 1588 1589static cache_ent * 1590new_cache_ent(int bsize) 1591{ 1592 cache_ent *ce; 1593 1594 ce = (cache_ent *)calloc(1, sizeof(cache_ent)); 1595 if (ce == NULL) { 1596 panic("*** error: cache can't allocate memory!\n"); 1597 return NULL; 1598 } 1599 1600 ce->data = malloc(bsize); 1601 if (ce->data == NULL) { 1602 free(ce); 1603 panic("** error cache can't allocate data memory\n"); 1604 UNLOCK(bc.lock); 1605 return NULL; 1606 } 1607 1608 ce->dev = -1; 1609 ce->block_num = -1; 1610 1611 return ce; 1612} 1613 1614 1615static void 1616get_ents(cache_ent **ents, int num_needed, int max, int *num_gotten, int bsize) 1617{ 1618 int cur, retry_counter = 0, max_retry = num_needed * 256; 1619 cache_ent *ce; 1620 1621 if (num_needed > max) 1622 panic("get_ents: num_needed %d but max %d (doh!)\n", num_needed, max); 1623 1624 /* if the cache isn't full yet, just allocate the blocks */ 1625 for(cur=0; bc.cur_blocks < bc.max_blocks && cur < num_needed; cur++) { 1626 ents[cur] = new_cache_ent(bsize); 1627 if (ents[cur] == NULL) 1628 break; 1629 bc.cur_blocks++; 1630 } 1631 1632 /* pluck off blocks from the LRU end of the normal list, keep trying too */ 1633 while(cur < num_needed && retry_counter < max_retry) { 1634 for(ce=bc.normal.lru; ce && cur < num_needed; ce=ce->next) { 1635 if (ce->lock) 1636 panic("get_ents: normal list has locked blocks (ce 0x%x)\n",ce); 1637 1638 if (ce->flags & CE_BUSY) /* don't touch busy blocks */ 1639 continue; 1640 1641 ce->flags |= CE_BUSY; 1642 ents[cur++] = ce; 1643 } 1644 1645 if (cur < num_needed) { 1646 UNLOCK(bc.lock); 1647 snooze(10000); 1648 LOCK(bc.lock); 1649 retry_counter++; 1650 } 1651 } 1652 1653 if (cur < num_needed && retry_counter >= max_retry) { /* oh shit! */ 1654 dump_cache_list(); 1655 UNLOCK(bc.lock); 1656 panic("get_ents: waited too long; can't get enough ce's (c %d n %d)\n", 1657 cur, num_needed); 1658 } 1659 1660 /* 1661 If the last block is a dirty one, try to get more of 'em so 1662 that we can flush a bunch of blocks at once. 1663 */ 1664 if (cur && cur < max && 1665 ((ents[cur-1]->flags & CE_DIRTY) || ents[cur-1]->clone)) { 1666 1667 for(ce=ents[cur-1]->next; ce && cur < max; ce=ce->next) { 1668 if (ce->flags & CE_BUSY) /* don't touch busy blocks */ 1669 continue; 1670 1671 if (ce->lock) 1672 panic("get_ents:2 dirty list has locked blocks (ce 0x%x)\n",ce); 1673 1674 ce->flags |= CE_BUSY; 1675 ents[cur++] = ce; 1676 } 1677 } 1678 1679 *num_gotten = cur; 1680} 1681 1682 1683static int 1684read_into_ents(int dev, fs_off_t bnum, cache_ent **ents, int num, int bsize) 1685{ 1686 int i, ret; 1687 struct iovec *iov; 1688 1689 iov = get_iovec_array(); 1690 1691 for (i = 0; i < num; i++) { 1692 iov[i].iov_base = ents[i]->data; 1693 iov[i].iov_len = bsize; 1694 } 1695 1696 if (chatty_io > 2) 1697 printf("readv @ %Ld for %d blocks (at %Ld, block_size = %d)\n", bnum, num, bnum*bsize, bsize); 1698 ret = readv_pos(dev, bnum*bsize, iov, num); 1699 1700 release_iovec_array(iov); 1701 1702 if (ret != num*bsize) { 1703 printf("read_into_ents: asked to read %d bytes but got %d\n", 1704 num*bsize, ret); 1705 printf("*** iov @ %p (num %d)\n", iov, num); 1706 return FS_EINVAL; 1707 } else 1708 return 0; 1709} 1710 1711 1712 1713#define CACHE_READ 0x0001 1714#define CACHE_WRITE 0x0002 1715#define CACHE_NOOP 0x0004 /* for getting empty blocks */ 1716#define CACHE_LOCKED 0x0008 1717#define CACHE_READ_AHEAD_OK 0x0010 /* it's ok to do read-ahead */ 1718 1719 1720static char * 1721op_to_str(int op) 1722{ 1723 static char buff[128]; 1724 1725 if (op & CACHE_READ) 1726 strcpy(buff, "READ"); 1727 else if (op & CACHE_WRITE) 1728 strcpy(buff, "WRITE"); 1729 else if (op & CACHE_NOOP) 1730 strcpy(buff, "NOP"); 1731 1732 if (op & CACHE_LOCKED) 1733 strcat(buff, " LOCKED"); 1734 1735 if (op & CACHE_READ_AHEAD_OK) 1736 strcat(buff, " (AHEAD)"); 1737 1738 return buff; 1739} 1740 1741static int 1742cache_block_io(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize, 1743 int op, void **dataptr) 1744{ 1745 size_t err = 0; 1746 cache_ent *ce; 1747 cache_ent_list *cel = NULL; 1748 1749 if (chatty_io > 1) 1750 printf("cbio: bnum = %Ld, num_blocks = %Ld, bsize = %d, op = %s\n", bnum, num_blocks, 1751 bsize, op_to_str(op)); 1752 1753 /* some sanity checks first */ 1754 if (bsize == 0) 1755 panic("cache_io: block size == 0 for bnum %Ld?!?\n", bnum); 1756 1757 if (num_blocks == 0) 1758 panic("cache_io: bnum %Ld has num_blocks == 0!\n", bnum); 1759 1760 if (data == NULL && dataptr == NULL) { 1761 printf("major butthead move: null data and dataptr! bnum %Ld:%Ld\n", 1762 bnum, num_blocks); 1763 return FS_ENOMEM; 1764 } 1765 1766 if (data == NULL) { 1767 if (num_blocks != 1) /* get_block() should never do that */ 1768 panic("cache_io: num_blocks %Ld but should be 1\n", 1769 num_blocks); 1770 1771 if (op & CACHE_WRITE) 1772 panic("cache_io: get_block() asked to write?!?\n"); 1773 } 1774 1775 if (bnum + num_blocks > max_device_blocks[dev]) { 1776 printf("dev %d: access to blocks %Ld:%Ld but max_dev_blocks is %Ld\n", 1777 dev, bnum, num_blocks, max_device_blocks[dev]); 1778 1779 // let the app crash here 1780 debugger("Accessed blocks out of device range!"); 1781 //*(int *)0x3100 = 0xc0debabe; 1782 1783 return FS_EINVAL; 1784 } 1785 1786 last_cache_access = system_time(); 1787 1788 /* if the i/o is greater than 64k, do it directly */ 1789 if (num_blocks * bsize >= 64 * 1024) { 1790 char *ptr; 1791 fs_off_t tmp; 1792 1793 if (data == NULL || (op & CACHE_LOCKED)) { 1794 panic("*** asked to do a large locked io that's too hard!\n"); 1795 } 1796 1797 if (op & CACHE_READ) { 1798 if (read_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) { 1799 printf("cache read:read_phys_blocks failed (%s on blocks %Ld:%Ld)!\n", 1800 strerror(errno), bnum, num_blocks); 1801 return FS_EINVAL; 1802 } 1803 1804 LOCK(bc.lock); 1805 1806 /* if any of the blocks are in the cache, grab them instead */ 1807 ptr = data; 1808 for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) { 1809 ce = block_lookup(dev, tmp); 1810 /* 1811 if we find a block in the cache we have to copy its 1812 data just in case it is more recent than what we just 1813 read from disk (which could happen if someone wrote 1814 these blocks after we did the read but before we locked 1815 the cache and entered this loop). 1816 */ 1817 if (ce) { 1818 if (tmp != ce->block_num || dev != ce->dev) { 1819 UNLOCK(bc.lock); 1820 panic("*** error4: looked up dev %d block %Ld but " 1821 "found %d %Ld\n", dev, tmp, ce->dev, 1822 ce->block_num); 1823 } 1824 1825 memcpy(ptr, ce->data, bsize); 1826 } 1827 } 1828 1829 UNLOCK(bc.lock); 1830 } else if (op & CACHE_WRITE) { 1831 LOCK(bc.lock); 1832 1833 /* if any of the blocks are in the cache, update them too */ 1834 ptr = data; 1835 for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) { 1836 ce = block_lookup(dev, tmp); 1837 if (ce) { 1838 if (tmp != ce->block_num || dev != ce->dev) { 1839 UNLOCK(bc.lock); 1840 panic("*** error5: looked up dev %d block %Ld but " 1841 "found %d %Ld\n", dev, tmp, ce->dev, 1842 ce->block_num); 1843 return FS_EBADF; 1844 } 1845 1846 /* XXXdbg -- this isn't strictly necessary */ 1847 if (ce->clone) { 1848 printf("over-writing cloned data (ce %p bnum %Ld)...\n", ce, tmp); 1849 flush_cache_ent(ce); 1850 } 1851 1852 /* copy the data into the cache */ 1853 memcpy(ce->data, ptr, bsize); 1854 } 1855 } 1856 1857 UNLOCK(bc.lock); 1858 1859 if (write_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) { 1860 printf("cache write: write_phys_blocks failed (%s on blocks " 1861 "%Ld:%Ld)!\n", strerror(errno), bnum, num_blocks); 1862 return FS_EINVAL; 1863 } 1864 } else { 1865 printf("bad cache op %d (bnum %Ld nblocks %Ld)\n", op, bnum, num_blocks); 1866 return FS_EINVAL; 1867 } 1868 1869 return 0; 1870 } 1871 1872 1873 LOCK(bc.lock); 1874 while(num_blocks) { 1875 1876 ce = block_lookup(dev, bnum); 1877 if (ce) { 1878 if (bnum != ce->block_num || dev != ce->dev) { 1879 UNLOCK(bc.lock); 1880 panic("*** error6: looked up dev %d block %ld but found " 1881 "%d %ld\n", dev, bnum, ce->dev, ce->block_num); 1882 return FS_EBADF; 1883 } 1884 1885 if (bsize != ce->bsize) { 1886 panic("*** requested bsize %d but ce->bsize %d ce @ 0x%x\n", 1887 bsize, ce->bsize, ce); 1888 } 1889 1890 /* delete this ent from the list it is in because it may change */ 1891 if (ce->lock) 1892 cel = &bc.locked; 1893 else 1894 cel = &bc.normal; 1895 1896 delete_from_list(cel, ce); 1897 1898 if (op & CACHE_READ) { 1899 if (data && data != ce->data) { 1900 memcpy(data, ce->data, bsize); 1901 } else if (dataptr) { 1902 *dataptr = ce->data; 1903 } else { 1904 printf("cbio:data %p dptr %p ce @ %p ce->data %p\n", 1905 data, dataptr, ce, ce->data); 1906 } 1907 } else if (op & CACHE_WRITE) { 1908 if (data && data != ce->data) 1909 memcpy(ce->data, data, bsize); 1910 1911 ce->flags |= CE_DIRTY; 1912 } else if (op & CACHE_NOOP) { 1913 memset(ce->data, 0, bsize); 1914 if (data) 1915 memset(data, 0, bsize); 1916 1917 if (dataptr) 1918 *dataptr = ce->data; 1919 1920 ce->flags |= CE_DIRTY; 1921 } else { 1922 panic("cached_block_io: bogus op %d\n", op); 1923 } 1924 1925 if (op & CACHE_LOCKED) 1926 ce->lock++; 1927 1928 if (ce->lock) 1929 cel = &bc.locked; 1930 else 1931 cel = &bc.normal; 1932 1933 /* now put this ent at the head of the appropriate list */ 1934 add_to_head(cel, ce); 1935 1936 if (data != NULL) 1937 data = (void *)((char *)data + bsize); 1938 1939 bnum += 1; 1940 num_blocks -= 1; 1941 1942 continue; 1943 } else { /* it's not in the cache */ 1944 int cur, cur_nblocks, num_dirty, real_nblocks, num_needed; 1945 cache_ent *ents[NUM_FLUSH_BLOCKS]; 1946 1947 /* 1948 here we find out how many additional blocks in this request 1949 are not in the cache. the idea is that then we can do one 1950 big i/o on that many blocks at once. 1951 */ 1952 for(cur_nblocks=1; 1953 cur_nblocks < num_blocks && cur_nblocks < NUM_FLUSH_BLOCKS; 1954 cur_nblocks++) { 1955 1956 /* we can call hash_lookup() directly instead of 1957 block_lookup() because we don't care about the 1958 state of the busy bit of the block at this point 1959 */ 1960 if (hash_lookup(&bc.ht, dev, bnum + cur_nblocks)) 1961 break; 1962 } 1963 1964 /* 1965 here we try to figure out how many extra blocks we should read 1966 for read-ahead. we want to read as many as possible that are 1967 not already in the cache and that don't cause us to try and 1968 read beyond the end of the disk. 1969 */ 1970 if ((op & CACHE_READ) && (op & CACHE_READ_AHEAD_OK) && 1971 (cur_nblocks * bsize) < read_ahead_size) { 1972 1973 for(num_needed=cur_nblocks; 1974 num_needed < (read_ahead_size / bsize); 1975 num_needed++) { 1976 1977 if ((bnum + num_needed) >= max_device_blocks[dev]) 1978 break; 1979 1980 if (hash_lookup(&bc.ht, dev, bnum + num_needed)) 1981 break; 1982 } 1983 } else { 1984 num_needed = cur_nblocks; 1985 } 1986 1987 /* this will get us pointers to a bunch of cache_ents we can use */ 1988 get_ents(ents, num_needed, NUM_FLUSH_BLOCKS, &real_nblocks, bsize); 1989 1990 if (real_nblocks < num_needed) { 1991 panic("don't have enough cache ents (need %d got %d %ld::%d)\n", 1992 num_needed, real_nblocks, bnum, num_blocks); 1993 } 1994 1995 /* 1996 There are now three variables used as limits within the ents 1997 array. This is how they are related: 1998 1999 cur_nblocks <= num_needed <= real_nblocks 2000 2001 Ents from 0 to cur_nblocks-1 are going to be used to fulfill 2002 this IO request. Ents from cur_nblocks to num_needed-1 are 2003 for read-ahead. Ents from num_needed to real_nblocks are 2004 extra blocks that get_ents() asked us to flush. Often (and 2005 always on writes) cur_nblocks == num_needed. 2006 2007 Below, we sort the list of ents so that when we flush them 2008 they go out in order. 2009 */ 2010 2011 qsort(ents, real_nblocks, sizeof(cache_ent **), cache_ent_cmp); 2012 2013 /* 2014 delete each ent from its list because it will change. also 2015 count up how many dirty blocks there are and insert into the 2016 hash table any new blocks so that no one else will try to 2017 read them in when we release the cache semaphore to do our I/O. 2018 */ 2019 for(cur=0,num_dirty=0; cur < real_nblocks; cur++) { 2020 ce = ents[cur]; 2021 ce->flags |= CE_BUSY; 2022 2023 /* 2024 insert the new block into the hash table with its new block 2025 number. note that the block is still in the hash table for 2026 its old block number -- and it has to be until we are done 2027 flushing it from the cache (to prevent someone else from 2028 sneaking in in front of us and trying to read the same 2029 block that we're flushing). 2030 */ 2031 if (cur < num_needed) { 2032 if (hash_insert(&bc.ht, dev, bnum + cur, ce) != 0) 2033 panic("could not insert cache ent for %d %ld (0x%lx)\n", 2034 dev, bnum + cur, (ulong)ents[cur]); 2035 } 2036 2037 if (ce->dev == -1) 2038 continue; 2039 2040 if ((ce->flags & CE_DIRTY) || ce->clone) 2041 num_dirty++; 2042 2043 if (ce->lock) 2044 panic("cbio: can't use locked blocks here ce @ 0x%x\n",ce); 2045 else 2046 cel = &bc.normal; 2047 2048 delete_from_list(cel, ce); 2049 } 2050 ce = NULL; 2051 2052 2053 /* 2054 we release the block cache semaphore here so that we can 2055 go do all the i/o we need to do (flushing dirty blocks 2056 that we're kicking out as well as reading any new data). 2057 2058 because all the blocks we're touching are marked busy 2059 no one else should mess with them while we're doing this. 2060 */ 2061 if (num_dirty || (op & CACHE_READ)) { 2062 UNLOCK(bc.lock); 2063 2064 /* this flushes any blocks we're kicking out that are dirty */ 2065 if (num_dirty && (err = flush_ents(ents, real_nblocks)) != 0) { 2066 printf("flush ents failed (ents @ 0x%lx, nblocks %d!\n", 2067 (ulong)ents, cur_nblocks); 2068 goto handle_err; 2069 } 2070 2071 } 2072 2073 /* 2074 now that everything is flushed to disk, go through and 2075 make sure that the data blocks we're going to use are 2076 the right block size for this current request (it's 2077 possible we're kicking out some smaller blocks and need 2078 to reallocate the data block pointer). We do this in two 2079 steps, first free'ing everything and then going through 2080 and doing the malloc's to try and be nice to the memory 2081 system (i.e. allow it to coalesce stuff, etc). 2082 */ 2083 err = 0; 2084 for(cur=0; cur < num_needed; cur++) { 2085 if (ents[cur]->bsize != bsize) { 2086 free(ents[cur]->data); 2087 ents[cur]->data = NULL; 2088 2089 if (ents[cur]->clone) { 2090 free(ents[cur]->clone); 2091 ents[cur]->clone = NULL; 2092 } 2093 } 2094 } 2095 2096 for(cur=0; cur < num_needed; cur++) { 2097 if (ents[cur]->data == NULL) { 2098 ents[cur]->data = (void *)malloc(bsize); 2099 ents[cur]->bsize = bsize; 2100 } 2101 2102 if (ents[cur]->data == NULL) { 2103 printf("cache: no memory for block (bsize %d)!\n", 2104 bsize); 2105 err = FS_ENOMEM; 2106 break; 2107 } 2108 } 2109 2110 /* 2111 if this condition is true it's a pretty serious error. 2112 we'll try and back out gracefully but we're in pretty 2113 deep at this point and it ain't going to be easy. 2114 */ 2115 handle_err: 2116 if (err) { 2117 for(cur=0; cur < num_needed; cur++) { 2118 cache_ent *tmp_ce; 2119 2120 tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur); 2121 if (tmp_ce != ents[cur]) { 2122 panic("hash_del0: %d %ld got 0x%lx, not 0x%lx\n", 2123 dev, bnum+cur, (ulong)tmp_ce, 2124 (ulong)ents[cur]); 2125 } 2126 2127 tmp_ce = (cache_ent *)hash_delete(&bc.ht,ents[cur]->dev, 2128 ents[cur]->block_num); 2129 if (tmp_ce != ents[cur]) { 2130 panic("hash_del1: %d %ld got 0x%lx, not 0x%lx\n", 2131 ents[cur]->dev, ents[cur]->block_num, (ulong)tmp_ce, 2132 (ulong)ents[cur]); 2133 } 2134 2135 ents[cur]->flags &= ~CE_BUSY; 2136 if (ents[cur]->data) 2137 free(ents[cur]->data); 2138 free(ents[cur]); 2139 ents[cur] = NULL; 2140 2141 bc.cur_blocks--; 2142 } 2143 2144 if (cur < real_nblocks) { 2145 LOCK(bc.lock); 2146 for(; cur < real_nblocks; cur++) { 2147 ents[cur]->flags &= ~CE_BUSY; 2148 2149 /* we have to put them back here */ 2150 add_to_tail(&bc.normal, ents[cur]); 2151 } 2152 UNLOCK(bc.lock); 2153 } 2154 2155 return FS_ENOMEM; 2156 } 2157 2158 2159 /* 2160 If we go into this if statement, the block cache lock 2161 has *already been released* up above when we flushed the 2162 dirty entries. As always, since the blocks we're mucking 2163 with are marked busy, they shouldn't get messed with. 2164 */ 2165 err = 0; 2166 if (num_dirty || (op & CACHE_READ)) { 2167 /* this section performs the i/o that we need to do */ 2168 if (op & CACHE_READ) { 2169 err = read_into_ents(dev, bnum, ents, num_needed, bsize); 2170 } else { 2171 err = 0; 2172 } 2173 2174 if (err != 0) { 2175 printf("err %s on dev %d block %Ld:%d (%d) " 2176 "data %p, ents[0] %p\n", 2177 strerror(errno), dev, bnum, cur_nblocks, 2178 bsize, data, ents[0]); 2179 } 2180 2181 /* 2182 acquire the semaphore here so that we can go on mucking 2183 with the cache data structures. We need to delete old 2184 block numbers from the hash table and set the new block 2185 number's for the blocks we just read in. We also put the 2186 read-ahead blocks at the head of mru list. 2187 */ 2188 2189 LOCK(bc.lock); 2190 } 2191 2192 for(cur=0; cur < num_needed; cur++) { 2193 cache_ent *tmp_ce; 2194 2195 ce = ents[cur]; 2196 if (ce->dev != -1) { 2197 tmp_ce = hash_delete(&bc.ht, ce->dev, ce->block_num); 2198 if (tmp_ce == NULL || tmp_ce != ce) { 2199 panic("*** hash_delete failure (ce 0x%x tce 0x%x)\n", 2200 ce, tmp_ce); 2201 } 2202 } 2203 2204 if (err == 0 && cur >= cur_nblocks) { 2205 ce->dev = dev; 2206 ce->block_num = bnum + cur; 2207 ce->flags &= ~CE_BUSY; 2208 add_to_head(&bc.normal, ce); 2209 } 2210 } 2211 ce = NULL; 2212 2213 /* 2214 clear the busy bit on the blocks we force-flushed and 2215 put them on the normal list since they're now clean. 2216 */ 2217 for(; cur < real_nblocks; cur++) { 2218 ents[cur]->flags &= ~CE_BUSY; 2219 2220 if (ents[cur]->lock) 2221 panic("should not have locked blocks here (ce 0x%x)\n", 2222 ents[cur]); 2223 2224 add_to_tail(&bc.normal, ents[cur]); 2225 } 2226 2227 if (err) { /* then we have some cleanup to do */ 2228 for(cur=0; cur < num_needed; cur++) { 2229 cache_ent *tmp_ce; 2230 2231 /* we delete all blocks from the cache so we don't 2232 leave partially written blocks in the cache */ 2233 2234 tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur); 2235 if (tmp_ce != ents[cur]) { 2236 panic("hash_del: %d %ld got 0x%lx, not 0x%lx\n", 2237 dev, bnum+cur, (ulong)tmp_ce, 2238 (ulong)ents[cur]); 2239 } 2240 2241 ce = ents[cur]; 2242 ce->flags &= ~CE_BUSY; 2243 2244 free(ce->data); 2245 ce->data = NULL; 2246 2247 free(ce); 2248 ents[cur] = NULL; 2249 2250 bc.cur_blocks--; 2251 } 2252 ce = NULL; 2253 2254 UNLOCK(bc.lock); 2255 return err; 2256 } 2257 2258 2259 /* 2260 last step: go through and make sure all the cache_ent 2261 structures have the right data in them, delete old guys, etc. 2262 */ 2263 for(cur=0; cur < cur_nblocks; cur++) { 2264 ce = ents[cur]; 2265 2266 if (ce->dev != -1) { /* then clean this guy up */ 2267 if (ce->next || ce->prev) 2268 panic("ce @ 0x%x should not be in a list yet!\n", ce); 2269 2270 if (ce->clone) 2271 free(ce->clone); 2272 2273 if (ce->data == NULL) 2274 panic("ce @ 0x%lx has a null data ptr\n", (ulong)ce); 2275 } 2276 2277 ce->dev = dev; 2278 ce->block_num = bnum + cur; 2279 ce->bsize = bsize; 2280 ce->flags = CE_NORMAL; 2281 ce->lock = 0; 2282 ce->clone = NULL; 2283 ce->func = ce->arg = NULL; 2284 ce->next = ce->prev = NULL; 2285 2286 if (op & CACHE_READ) { 2287 if (data) 2288 memcpy(data, ce->data, bsize); 2289 } else if (op & CACHE_WRITE) { 2290 ce->flags |= CE_DIRTY; 2291 memcpy(ce->data, data, bsize); 2292 } else if (op & CACHE_NOOP) { 2293 memset(ce->data, 0, bsize); 2294 if (data) 2295 memset(data, 0, bsize); 2296 2297 ce->flags |= CE_DIRTY; 2298 } 2299 2300 if (op & CACHE_LOCKED) { 2301 ce->lock++; 2302 cel = &bc.locked; 2303 } else { 2304 cel = &bc.normal; 2305 } 2306 2307 /* now stick this puppy at the head of the mru list */ 2308 add_to_head(cel, ce); 2309 2310 2311 if (dataptr) { 2312 *dataptr = ce->data; 2313 } 2314 2315 if (data != NULL) 2316 data = (void *)((char *)data + bsize); 2317 else if (cur_nblocks != 1) 2318 panic("cache can't handle setting data_ptr twice!\n"); 2319 } /* end of for(cur=0; cur < cur_nblocks; cur++) */ 2320 2321 bnum += cur_nblocks; 2322 num_blocks -= cur_nblocks; 2323 2324 } /* end of else it's not in the cache */ 2325 2326 } /* end of while(num_blocks) */ 2327 2328 UNLOCK(bc.lock); 2329 2330 return 0; 2331} 2332 2333 2334void * 2335get_block(int dev, fs_off_t bnum, int bsize) 2336{ 2337 void *data; 2338 2339 if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_READ|CACHE_LOCKED|CACHE_READ_AHEAD_OK, 2340 &data) != 0) 2341 return NULL; 2342 2343 return data; 2344} 2345 2346void * 2347get_empty_block(int dev, fs_off_t bnum, int bsize) 2348{ 2349 void *data; 2350 2351 if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_NOOP|CACHE_LOCKED, 2352 &data) != 0) 2353 return NULL; 2354 2355 return data; 2356} 2357 2358int 2359cached_read(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize) 2360{ 2361 return cache_block_io(dev, bnum, data, num_blocks, bsize, 2362 CACHE_READ | CACHE_READ_AHEAD_OK, NULL); 2363} 2364 2365 2366int 2367cached_write(int dev, fs_off_t bnum, const void *data, fs_off_t num_blocks,int bsize) 2368{ 2369 return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize, 2370 CACHE_WRITE, NULL); 2371} 2372 2373int 2374cached_write_locked(int dev, fs_off_t bnum, const void *data, 2375 fs_off_t num_blocks, int bsize) 2376{ 2377 return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize, 2378 CACHE_WRITE | CACHE_LOCKED, NULL); 2379} 2380 2381 2382void 2383force_cache_flush(int dev, int prefer_log_blocks) 2384{ 2385 int i, count = 0; 2386 cache_ent *ce; 2387 cache_ent *ents[NUM_FLUSH_BLOCKS]; 2388 2389 2390 LOCK(bc.lock); 2391 2392 for(ce=bc.normal.lru; ce; ce=ce->next) { 2393 if ((ce->dev == dev) && 2394 (ce->flags & CE_BUSY) == 0 && 2395 ((ce->flags & CE_DIRTY) || ce->clone) && 2396 ((prefer_log_blocks && ce->func) || (prefer_log_blocks == 0))) { 2397 2398 ce->flags |= CE_BUSY; 2399 ents[count++] = ce; 2400 2401 if (count >= NUM_FLUSH_BLOCKS) { 2402 break; 2403 } 2404 } 2405 } 2406 2407 /* if we've got some room left, try and grab any cloned blocks */ 2408 if (count < NUM_FLUSH_BLOCKS) { 2409 for(ce=bc.locked.lru; ce; ce=ce->next) { 2410 if ((ce->dev == dev) && 2411 (ce->flags & CE_BUSY) == 0 && 2412 (ce->clone)) { 2413 2414 ce->flags |= CE_BUSY; 2415 ents[count++] = ce; 2416 2417 if (count >= NUM_FLUSH_BLOCKS) { 2418 break; 2419 } 2420 } 2421 } 2422 } 2423 2424 UNLOCK(bc.lock); 2425 2426 if (count != 0) { 2427 qsort(ents, count, sizeof(cache_ent **), cache_ent_cmp); 2428 flush_ents(ents, count); 2429 2430 for(i=0; i < count; i++) 2431 ents[i]->flags &= ~CE_BUSY; 2432 } 2433} 2434