1/* 2 This file contains the global device cache for the BeOS. All 3 file system I/O comes through here. The cache can handle blocks 4 of different sizes for multiple different underlying physical 5 devices. 6 7 The cache is organized as a hash table (for lookups by device 8 and block number) and two doubly-linked lists. The normal 9 list is for "normal" blocks which are either clean or dirty. 10 The locked list is for blocks that are locked in the cache by 11 BFS. The lists are LRU ordered. 12 13 Most of the work happens in the function cache_block_io() which 14 is quite lengthy. The other functions of interest are get_ents() 15 which picks victims to be kicked out of the cache; flush_ents() 16 which does the work of flushing them; and set_blocks_info() which 17 handles cloning blocks and setting callbacks so that the BFS 18 journal will work properly. If you want to modify this code it 19 will take some study but it's not too bad. Do not think about 20 separating the list of clean and dirty blocks into two lists as 21 I did that already and it's slower. 22 23 Originally this cache code was written while listening to the album 24 "Ride the Lightning" by Metallica. The current version was written 25 while listening to "Welcome to SkyValley" by Kyuss as well as an 26 ambient collection on the German label FAX. Music helps a lot when 27 writing code. 28 29 THIS CODE COPYRIGHT DOMINIC GIAMPAOLO. NO WARRANTY IS EXPRESSED 30 OR IMPLIED. YOU MAY USE THIS CODE AND FREELY DISTRIBUTE IT FOR 31 NON-COMMERCIAL USE AS LONG AS THIS NOTICE REMAINS ATTACHED. 32 33 FOR COMMERCIAL USE, CONTACT DOMINIC GIAMPAOLO (dbg@be.com). 34 35 Dominic Giampaolo 36 dbg@be.com 37*/ 38 39#include <stdio.h> 40#include <stdlib.h> 41#include <memory.h> 42#include <string.h> 43#include <errno.h> 44#include <fcntl.h> 45#include <sys/types.h> 46#include <unistd.h> 47 48 49#if (defined(__BEOS__) || defined(__HAIKU__)) 50#include <OS.h> 51#include <KernelExport.h> 52#endif 53 54#include "compat.h" 55#include "lock.h" 56#include "cache.h" 57 58 59 60#ifndef USER 61#define printf dprintf 62#endif 63#ifdef USER 64#define kprintf printf 65#endif 66 67 68/* forward prototypes */ 69static int flush_ents(cache_ent **ents, int n_ents); 70 71//static int do_dump(int argc, char **argv); 72//static int do_find_block(int argc, char **argv); 73//static int do_find_data(int argc, char **argv); 74//static void cache_flusher(void *arg, int phase); 75 76 77int chatty_io = 0; 78 79#define CHUNK (512 * 1024) /* a hack to work around scsi driver bugs */ 80 81size_t 82read_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize) 83{ 84 size_t ret = 0; 85 size_t sum; 86 87 if (chatty_io) 88 printf("R: %8Ld : %3d\n", bnum, num_blocks); 89 90 if (num_blocks * bsize < CHUNK) 91 ret = read_pos(fd, bnum * bsize, data, num_blocks * bsize); 92 else { 93 for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) { 94 ret = read_pos(fd, (bnum * bsize) + sum, data, CHUNK); 95 if (ret != CHUNK) 96 break; 97 98 data = (void *)((char *)data + CHUNK); 99 } 100 101 if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) { 102 ret = read_pos(fd, (bnum * bsize) + sum, data, 103 (num_blocks * bsize) - sum); 104 105 if (ret == (num_blocks * bsize) - sum) 106 ret = num_blocks * bsize; 107 } else if (ret == CHUNK) { 108 ret = num_blocks * bsize; 109 } 110 } 111 112 if (ret == num_blocks * bsize) 113 return 0; 114 else 115 return EBADF; 116} 117 118size_t 119write_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize) 120{ 121 size_t ret = 0; 122 size_t sum; 123 124 if (chatty_io) 125 printf("W: %8Ld : %3d\n", bnum, num_blocks); 126 127 if (num_blocks * bsize < CHUNK) 128 ret = write_pos(fd, bnum * bsize, data, num_blocks * bsize); 129 else { 130 for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) { 131 ret = write_pos(fd, (bnum * bsize) + sum, data, CHUNK); 132 if (ret != CHUNK) 133 break; 134 135 data = (void *)((char *)data + CHUNK); 136 } 137 138 if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) { 139 ret = write_pos(fd, (bnum * bsize) + sum, data, 140 (num_blocks * bsize) - sum); 141 142 if (ret == (num_blocks * bsize) - sum) 143 ret = num_blocks * bsize; 144 } else if (ret == CHUNK) { 145 ret = num_blocks * bsize; 146 } 147 } 148 149 150 if (ret == num_blocks * bsize) 151 return 0; 152 else 153 return EBADF; 154} 155 156// #pragma mark - 157 158static int 159init_hash_table(hash_table *ht) 160{ 161 ht->max = HT_DEFAULT_MAX; 162 ht->mask = ht->max - 1; 163 ht->num_elements = 0; 164 165 ht->table = (hash_ent **)calloc(ht->max, sizeof(hash_ent *)); 166 if (ht->table == NULL) 167 return ENOMEM; 168 169 return 0; 170} 171 172 173static void 174shutdown_hash_table(hash_table *ht) 175{ 176 int i, hash_len; 177 hash_ent *he, *next; 178 179 for(i=0; i < ht->max; i++) { 180 he = ht->table[i]; 181 182 for(hash_len=0; he; hash_len++, he=next) { 183 next = he->next; 184 free(he); 185 } 186 } 187 188 if (ht->table) 189 free(ht->table); 190 ht->table = NULL; 191} 192 193#if 0 194static void 195print_hash_stats(hash_table *ht) 196{ 197 int i, hash_len, max = -1, sum = 0; 198 hash_ent *he, *next; 199 200 for(i=0; i < ht->max; i++) { 201 he = ht->table[i]; 202 203 for(hash_len=0; he; hash_len++, he=next) { 204 next = he->next; 205 } 206 if (hash_len) 207 printf("bucket %3d : %3d\n", i, hash_len); 208 209 sum += hash_len; 210 if (hash_len > max) 211 max = hash_len; 212 } 213 214 printf("max # of chains: %d, average chain length %d\n", max,sum/ht->max); 215} 216#endif 217 218#define HASH(d, b) ((((fs_off_t)d) << (sizeof(fs_off_t)*8 - 6)) | (b)) 219 220static hash_ent * 221new_hash_ent(int dev, fs_off_t bnum, void *data) 222{ 223 hash_ent *he; 224 225 he = (hash_ent *)malloc(sizeof(*he)); 226 if (he == NULL) 227 return NULL; 228 229 he->hash_val = HASH(dev, bnum); 230 he->dev = dev; 231 he->bnum = bnum; 232 he->data = data; 233 he->next = NULL; 234 235 return he; 236} 237 238 239static int 240grow_hash_table(hash_table *ht) 241{ 242 int i, omax, newsize, newmask; 243 fs_off_t hash; 244 hash_ent **new_table, *he, *next; 245 246 if (ht->max & ht->mask) { 247 printf("*** hashtable size %d or mask %d looks weird!\n", ht->max, 248 ht->mask); 249 } 250 251 omax = ht->max; 252 newsize = omax * 2; /* have to grow in powers of two */ 253 newmask = newsize - 1; 254 255 new_table = (hash_ent **)calloc(newsize, sizeof(hash_ent *)); 256 if (new_table == NULL) 257 return ENOMEM; 258 259 for(i=0; i < omax; i++) { 260 for(he=ht->table[i]; he; he=next) { 261 hash = he->hash_val & newmask; 262 next = he->next; 263 264 he->next = new_table[hash]; 265 new_table[hash] = he; 266 } 267 } 268 269 free(ht->table); 270 ht->table = new_table; 271 ht->max = newsize; 272 ht->mask = newmask; 273 274 return 0; 275} 276 277 278 279 280static int 281hash_insert(hash_table *ht, int dev, fs_off_t bnum, void *data) 282{ 283 fs_off_t hash; 284 hash_ent *he, *curr; 285 286 hash = HASH(dev, bnum) & ht->mask; 287 288 curr = ht->table[hash]; 289 for(; curr != NULL; curr=curr->next) 290 if (curr->dev == dev && curr->bnum == bnum) 291 break; 292 293 if (curr && curr->dev == dev && curr->bnum == bnum) { 294 printf("entry %d:%Ld already in the hash table!\n", dev, bnum); 295 return EEXIST; 296 } 297 298 he = new_hash_ent(dev, bnum, data); 299 if (he == NULL) 300 return ENOMEM; 301 302 he->next = ht->table[hash]; 303 ht->table[hash] = he; 304 305 ht->num_elements++; 306 if (ht->num_elements >= ((ht->max * 3) / 4)) { 307 if (grow_hash_table(ht) != 0) 308 return ENOMEM; 309 } 310 311 return 0; 312} 313 314static void * 315hash_lookup(hash_table *ht, int dev, fs_off_t bnum) 316{ 317 hash_ent *he; 318 319 he = ht->table[HASH(dev, bnum) & ht->mask]; 320 321 for(; he != NULL; he=he->next) { 322 if (he->dev == dev && he->bnum == bnum) 323 break; 324 } 325 326 if (he) 327 return he->data; 328 else 329 return NULL; 330} 331 332 333static void * 334hash_delete(hash_table *ht, int dev, fs_off_t bnum) 335{ 336 void *data; 337 fs_off_t hash; 338 hash_ent *he, *prev = NULL; 339 340 hash = HASH(dev, bnum) & ht->mask; 341 he = ht->table[hash]; 342 343 for(; he != NULL; prev=he,he=he->next) { 344 if (he->dev == dev && he->bnum == bnum) 345 break; 346 } 347 348 if (he == NULL) { 349 printf("*** hash_delete: tried to delete non-existent block %d:%Ld\n", 350 dev, bnum); 351 return NULL; 352 } 353 354 data = he->data; 355 356 if (ht->table[hash] == he) 357 ht->table[hash] = he->next; 358 else if (prev) 359 prev->next = he->next; 360 else 361 panic("hash table is inconsistent\n"); 362 363 free(he); 364 ht->num_elements--; 365 366 return data; 367} 368 369// #pragma mark - 370 371/* 372 These are the global variables for the cache. 373*/ 374static block_cache bc; 375 376#define MAX_IOVECS 64 /* # of iovecs for use by cache code */ 377static lock iovec_lock; 378static struct iovec *iovec_pool[MAX_IOVECS]; /* each ptr is to an array of iovecs */ 379static int iovec_used[MAX_IOVECS]; /* non-zero == iovec is in use */ 380 381#define NUM_FLUSH_BLOCKS 64 /* size of the iovec array pointed by each ptr */ 382 383 384#define DEFAULT_READ_AHEAD_SIZE (32 * 1024) 385static int read_ahead_size = DEFAULT_READ_AHEAD_SIZE; 386 387/* this array stores the size of each device so we can error check requests */ 388#define MAX_DEVICES 256 389fs_off_t max_device_blocks[MAX_DEVICES]; 390 391 392/* has the time of the last cache access so cache flushing doesn't interfere */ 393static bigtime_t last_cache_access = 0; 394 395 396int 397init_block_cache(int max_blocks, int flags) 398{ 399 memset(&bc, 0, sizeof(bc)); 400 memset(iovec_pool, 0, sizeof(iovec_pool)); 401 memset(iovec_used, 0, sizeof(iovec_used)); 402 memset(&max_device_blocks, 0, sizeof(max_device_blocks)); 403 404 if (init_hash_table(&bc.ht) != 0) 405 return ENOMEM; 406 407 bc.lock.s = iovec_lock.s = -1; 408 409 bc.max_blocks = max_blocks; 410 bc.flags = flags; 411 if (new_lock(&bc.lock, "bollockcache") != 0) 412 goto err; 413 414 if (new_lock(&iovec_lock, "iovec_lock") != 0) 415 goto err; 416 417 /* allocate two of these up front so vm won't accidently re-enter itself */ 418 iovec_pool[0] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS); 419 iovec_pool[1] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS); 420 421#ifndef USER 422#ifdef DEBUG 423 add_debugger_command("bcache", do_dump, "dump the block cache list"); 424 add_debugger_command("fblock", do_find_block, "find a block in the cache"); 425 add_debugger_command("fdata", do_find_data, "find a data block ptr in the cache"); 426#endif 427 register_kernel_daemon(cache_flusher, NULL, 3); 428#endif 429 430 return 0; 431 432 err: 433 if (bc.lock.s >= 0) 434 free_lock(&bc.lock); 435 436 if (iovec_lock.s >= 0) 437 free_lock(&iovec_lock); 438 439 shutdown_hash_table(&bc.ht); 440 memset((void *)&bc, 0, sizeof(bc)); 441 return ENOMEM; 442} 443 444 445static struct iovec * 446get_iovec_array(void) 447{ 448 int i; 449 struct iovec *iov; 450 451 LOCK(iovec_lock); 452 453 for(i=0; i < MAX_IOVECS; i++) { 454 if (iovec_used[i] == 0) 455 break; 456 } 457 458 if (i >= MAX_IOVECS) /* uh-oh */ 459 panic("cache: ran out of iovecs (pool 0x%x, used 0x%x)!\n", 460 &iovec_pool[0], &iovec_used[0]); 461 462 if (iovec_pool[i] == NULL) { 463 iovec_pool[i] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS); 464 if (iovec_pool == NULL) 465 panic("can't allocate an iovec!\n"); 466 } 467 468 iov = iovec_pool[i]; 469 iovec_used[i] = 1; 470 471 UNLOCK(iovec_lock); 472 473 return iov; 474} 475 476 477static void 478release_iovec_array(struct iovec *iov) 479{ 480 int i; 481 482 LOCK(iovec_lock); 483 484 for(i=0; i < MAX_IOVECS; i++) { 485 if (iov == iovec_pool[i]) 486 break; 487 } 488 489 if (i < MAX_IOVECS) 490 iovec_used[i] = 0; 491 else /* uh-oh */ 492 printf("cache: released an iovec I don't own (iov %p)\n", iov); 493 494 495 UNLOCK(iovec_lock); 496} 497 498 499 500 501static void 502real_dump_cache_list(cache_ent_list *cel) 503{ 504 cache_ent *ce; 505 506 kprintf("starting from LRU end:\n"); 507 508 for (ce = cel->lru; ce; ce = ce->next) { 509 kprintf("ce %p dev %2d bnum %6Ld lock %d flag %d arg %p " 510 "clone %p\n", ce, ce->dev, ce->block_num, ce->lock, 511 ce->flags, ce->arg, ce->clone); 512 } 513 kprintf("MRU end\n"); 514} 515 516static void 517dump_cache_list(void) 518{ 519 kprintf("NORMAL BLOCKS\n"); 520 real_dump_cache_list(&bc.normal); 521 522 kprintf("LOCKED BLOCKS\n"); 523 real_dump_cache_list(&bc.locked); 524 525 kprintf("cur blocks %d, max blocks %d ht @ %p\n", bc.cur_blocks, 526 bc.max_blocks, &bc.ht); 527} 528 529#if 0 530static void 531check_bcache(char *str) 532{ 533 int count = 0; 534 cache_ent *ce, *prev = NULL; 535 536 LOCK(bc.lock); 537 538 for(ce=bc.normal.lru; ce; prev=ce, ce=ce->next) { 539 count++; 540 } 541 542 for(ce=bc.locked.lru; ce; prev=ce, ce=ce->next) { 543 count++; 544 } 545 546 if (count != bc.cur_blocks) { 547 if (count < bc.cur_blocks - 16) 548 panic("%s: count == %d, cur_blocks %d, prev 0x%x\n", 549 str, count, bc.cur_blocks, prev); 550 else 551 printf("%s: count == %d, cur_blocks %d, prev 0x%x\n", 552 str, count, bc.cur_blocks, prev); 553 } 554 555 UNLOCK(bc.lock); 556} 557 558 559static void 560dump_lists(void) 561{ 562 cache_ent *nce; 563 564 printf("LOCKED 0x%x (tail 0x%x, head 0x%x)\n", &bc.locked, 565 bc.locked.lru, bc.locked.mru); 566 for(nce=bc.locked.lru; nce; nce=nce->next) 567 printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n", 568 nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone, 569 nce->func); 570 571 printf("NORMAL 0x%x (tail 0x%x, head 0x%x)\n", &bc.normal, 572 bc.normal.lru, bc.normal.mru); 573 for(nce=bc.normal.lru; nce; nce=nce->next) 574 printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n", 575 nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone, 576 nce->func); 577} 578 579 580static void 581check_lists(void) 582{ 583 cache_ent *ce, *prev, *oce; 584 cache_ent_list *cel; 585 586 cel = &bc.normal; 587 for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) { 588 for(oce=bc.locked.lru; oce; oce=oce->next) { 589 if (oce == ce) { 590 dump_lists(); 591 panic("1:ce @ 0x%x is in two lists(cel 0x%x &LOCKED)\n",ce,cel); 592 } 593 } 594 } 595 if (prev && prev != cel->mru) { 596 dump_lists(); 597 panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n", 598 prev, cel); 599 } 600 601 cel = &bc.locked; 602 for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) { 603 for(oce=bc.normal.lru; oce; oce=oce->next) { 604 if (oce == ce) { 605 dump_lists(); 606 panic("3:ce @ 0x%x is in two lists(cel 0x%x & DIRTY)\n",ce,cel); 607 } 608 } 609 } 610 if (prev && prev != cel->mru) { 611 dump_lists(); 612 panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n", 613 prev, cel); 614 } 615} 616#endif 617 618 619#ifdef DEBUG 620#if 0 621static int 622do_dump(int argc, char **argv) 623{ 624 dump_cache_list(); 625 return 1; 626} 627 628 629static int 630do_find_block(int argc, char **argv) 631{ 632 int i; 633 fs_off_t bnum; 634 cache_ent *ce; 635 636 if (argc < 2) { 637 kprintf("%s: needs a block # argument\n", argv[0]); 638 return 1; 639 } 640 641 for(i=1; i < argc; i++) { 642 bnum = strtoul(argv[i], NULL, 0); 643 644 for(ce=bc.normal.lru; ce; ce=ce->next) { 645 if (ce->block_num == bnum) { 646 kprintf("found clean bnum %ld @ 0x%lx (data @ 0x%lx)\n", 647 bnum, ce, ce->data); 648 } 649 } 650 651 for(ce=bc.locked.lru; ce; ce=ce->next) { 652 if (ce->block_num == bnum) { 653 kprintf("found locked bnum %ld @ 0x%lx (data @ 0x%lx)\n", 654 bnum, ce, ce->data); 655 } 656 } 657 } 658 659 return 0; 660} 661 662static int 663do_find_data(int argc, char **argv) 664{ 665 int i; 666 void *data; 667 cache_ent *ce; 668 669 if (argc < 2) { 670 kprintf("%s: needs a block # argument\n", argv[0]); 671 return 1; 672 } 673 674 for(i=1; i < argc; i++) { 675 data = (void *)strtoul(argv[i], NULL, 0); 676 677 for(ce=bc.normal.lru; ce; ce=ce->next) { 678 if (ce->data == data) { 679 kprintf("found normal data ptr for bnum %ld @ ce 0x%lx\n", 680 ce->block_num, ce); 681 } 682 } 683 684 for(ce=bc.locked.lru; ce; ce=ce->next) { 685 if (ce->data == data) { 686 kprintf("found locked data ptr for bnum %ld @ ce 0x%lx\n", 687 ce->block_num, ce); 688 } 689 } 690 } 691 692 return 0; 693} 694#endif 695#endif /* DEBUG */ 696 697 698 699/* 700 this function detaches the cache_ent from the list. 701*/ 702static void 703delete_from_list(cache_ent_list *cel, cache_ent *ce) 704{ 705 if (ce->next) 706 ce->next->prev = ce->prev; 707 if (ce->prev) 708 ce->prev->next = ce->next; 709 710 if (cel->lru == ce) 711 cel->lru = ce->next; 712 if (cel->mru == ce) 713 cel->mru = ce->prev; 714 715 ce->next = NULL; 716 ce->prev = NULL; 717} 718 719 720 721/* 722 this function adds the cache_ent ce to the head of the 723 list (i.e. the MRU end). the cache_ent should *not* 724 be in any lists. 725*/ 726static void 727add_to_head(cache_ent_list *cel, cache_ent *ce) 728{ 729if (ce->next != NULL || ce->prev != NULL) { 730 panic("*** ath: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n", 731 ce, ce->next, ce->prev); 732} 733 734 ce->next = NULL; 735 ce->prev = cel->mru; 736 737 if (cel->mru) 738 cel->mru->next = ce; 739 cel->mru = ce; 740 741 if (cel->lru == NULL) 742 cel->lru = ce; 743} 744 745 746/* 747 this function adds the cache_ent ce to the tail of the 748 list (i.e. the MRU end). the cache_ent should *not* 749 be in any lists. 750*/ 751static void 752add_to_tail(cache_ent_list *cel, cache_ent *ce) 753{ 754if (ce->next != NULL || ce->prev != NULL) { 755 panic("*** att: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n", 756 ce, ce->next, ce->prev); 757} 758 759 ce->next = cel->lru; 760 ce->prev = NULL; 761 762 if (cel->lru) 763 cel->lru->prev = ce; 764 cel->lru = ce; 765 766 if (cel->mru == NULL) 767 cel->mru = ce; 768} 769 770 771static int 772cache_ent_cmp(const void *a, const void *b) 773{ 774 fs_off_t diff; 775 cache_ent *p1 = *(cache_ent **)a, *p2 = *(cache_ent **)b; 776 777 if (p1 == NULL || p2 == NULL) 778 panic("cache_ent pointers are null?!? (a 0x%lx, b 0x%lx\n)\n", a, b); 779 780 if (p1->dev == p2->dev) { 781 diff = p1->block_num - p2->block_num; 782 return (int)diff; 783 } else { 784 return p1->dev - p2->dev; 785 } 786} 787 788#if 0 789// ToDo: add this to the fsh (as a background thread)? 790static void 791cache_flusher(void *arg, int phase) 792{ 793 int i, num_ents, err; 794 bigtime_t now = system_time(); 795 static cache_ent *ce = NULL; 796 static cache_ent *ents[NUM_FLUSH_BLOCKS]; 797 798 /* 799 if someone else was in the cache recently then just bail out so 800 we don't lock them out unnecessarily 801 */ 802 if ((now - last_cache_access) < 1000000) 803 return; 804 805 LOCK(bc.lock); 806 807 ce = bc.normal.lru; 808 809 for(num_ents=0; ce && num_ents < NUM_FLUSH_BLOCKS; ce=ce->next) { 810 if (ce->flags & CE_BUSY) 811 continue; 812 813 if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL) 814 continue; 815 816 ents[num_ents] = ce; 817 ents[num_ents]->flags |= CE_BUSY; 818 num_ents++; 819 } 820 821 /* if we've got some room left over, look for cloned locked blocks */ 822 if (num_ents < NUM_FLUSH_BLOCKS) { 823 ce = bc.locked.lru; 824 825 for(; num_ents < NUM_FLUSH_BLOCKS;) { 826 for(; 827 ce && ((ce->flags & CE_BUSY) || ce->clone == NULL); 828 ce=ce->next) 829 /* skip ents that meet the above criteria */; 830 831 if (ce == NULL) 832 break; 833 834 ents[num_ents] = ce; 835 ents[num_ents]->flags |= CE_BUSY; 836 ce = ce->next; 837 num_ents++; 838 } 839 } 840 841 UNLOCK(bc.lock); 842 843 if (num_ents == 0) 844 return; 845 846 qsort(ents, num_ents, sizeof(cache_ent **), cache_ent_cmp); 847 848 if ((err = flush_ents(ents, num_ents)) != 0) { 849 printf("flush ents failed (ents @ 0x%lx, num_ents %d!\n", 850 (ulong)ents, num_ents); 851 } 852 853 for(i=0; i < num_ents; i++) { /* clear the busy bit on each of ent */ 854 ents[i]->flags &= ~CE_BUSY; 855 } 856} 857#endif 858 859 860static int 861flush_cache_ent(cache_ent *ce) 862{ 863 int ret = 0; 864 void *data; 865 866 /* if true, then there's nothing to flush */ 867 if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL) 868 return 0; 869 870 /* same thing here */ 871 if (ce->clone == NULL && ce->lock != 0) 872 return 0; 873 874 restart: 875 if (ce->clone) 876 data = ce->clone; 877 else 878 data = ce->data; 879 880 if (chatty_io > 2) printf("flush: %7Ld\n", ce->block_num); 881 ret = write_phys_blocks(ce->dev, ce->block_num, data, 1, ce->bsize); 882 883 if (ce->func) { 884 ce->func(ce->logged_bnum, 1, ce->arg); 885 ce->func = NULL; 886 } 887 888 if (ce->clone) { 889 free(ce->clone); 890 ce->clone = NULL; 891 892 if (ce->lock == 0 && (ce->flags & CE_DIRTY)) 893 goto restart; /* also write the real data ptr */ 894 } else { 895 ce->flags &= ~CE_DIRTY; 896 } 897 898 return ret; 899} 900 901 902static int 903flush_ents(cache_ent **ents, int n_ents) 904{ 905 int i, j, k, ret = 0, bsize, iocnt, do_again = 0; 906 fs_off_t start_bnum; 907 struct iovec *iov; 908 909 iov = get_iovec_array(); 910 if (iov == NULL) 911 return ENOMEM; 912 913restart: 914 for(i=0; i < n_ents; i++) { 915 /* if true, then there's nothing to flush */ 916 if ((ents[i]->flags & CE_DIRTY) == 0 && ents[i]->clone == NULL) 917 continue; 918 919 /* if true we can't touch the dirty data yet because it's locked */ 920 if (ents[i]->clone == NULL && ents[i]->lock != 0) 921 continue; 922 923 924 bsize = ents[i]->bsize; 925 start_bnum = ents[i]->block_num; 926 927 for(j=i+1; j < n_ents && (j - i) < NUM_FLUSH_BLOCKS; j++) { 928 if (ents[j]->dev != ents[i]->dev || 929 ents[j]->block_num != start_bnum + (j - i)) 930 break; 931 932 if (ents[j]->clone == NULL && ents[j]->lock != 0) 933 break; 934 } 935 936 if (j == i+1) { /* only one block, just flush it directly */ 937 if ((ret = flush_cache_ent(ents[i])) != 0) 938 break; 939 continue; 940 } 941 942 943 for(k=i,iocnt=0; k < j; k++,iocnt++) { 944 if (ents[k]->clone) 945 iov[iocnt].iov_base = ents[k]->clone; 946 else 947 iov[iocnt].iov_base = ents[k]->data; 948 949 iov[iocnt].iov_len = bsize; 950 } 951 952 if (chatty_io) 953 printf("writev @ %Ld for %d blocks\n", start_bnum, iocnt); 954 955 ret = writev_pos(ents[i]->dev, start_bnum * (fs_off_t)bsize, 956 &iov[0], iocnt); 957 if (ret != iocnt*bsize) { 958 int idx; 959 960 printf("flush_ents: writev failed: iocnt %d start bnum %Ld " 961 "bsize %d, ret %d\n", iocnt, start_bnum, bsize, ret); 962 963 for(idx=0; idx < iocnt; idx++) 964 printf("iov[%2d] = %p :: %ld\n", idx, iov[idx].iov_base, 965 iov[idx].iov_len); 966 967 printf("error %s writing blocks %Ld:%d (%d != %d)\n", 968 strerror(errno), start_bnum, iocnt, ret, iocnt*bsize); 969 ret = EINVAL; 970 break; 971 } 972 ret = 0; 973 974 975 for(k=i; k < j; k++) { 976 if (ents[k]->func) { 977 ents[k]->func(ents[k]->logged_bnum, 1, ents[k]->arg); 978 ents[k]->func = NULL; 979 } 980 981 if (ents[k]->clone) { 982 free(ents[k]->clone); 983 ents[k]->clone = NULL; 984 } else { 985 ents[k]->flags &= ~CE_DIRTY; 986 } 987 } 988 989 990 i = j - 1; /* i gets incremented by the outer for loop */ 991 } 992 993 /* 994 here we have to go back through and flush any blocks that are 995 still dirty. with an arched brow you astutely ask, "but how 996 could this happen given the above loop?" Ahhh young grasshopper 997 I say, the path through the cache is long and twisty and fraught 998 with peril. The reason it can happen is that a block can be both 999 cloned and dirty. The above loop would only flush the cloned half 1000 of the data, not the main dirty block. So we have to go back 1001 through and see if there are any blocks that are still dirty. If 1002 there are we go back to the top of the function and do the whole 1003 thing over. Kind of grody but it is necessary to insure the 1004 correctness of the log for the Be file system. 1005 */ 1006 if (do_again == 0) { 1007 for(i=0; i < n_ents; i++) { 1008 if ((ents[i]->flags & CE_DIRTY) == 0 || ents[i]->lock) 1009 continue; 1010 1011 do_again = 1; 1012 break; 1013 } 1014 1015 if (do_again) 1016 goto restart; 1017 } 1018 1019 release_iovec_array(iov); 1020 1021 1022 return ret; 1023} 1024 1025static void 1026delete_cache_list(cache_ent_list *cel) 1027{ 1028 void *junk; 1029 cache_ent *ce, *next; 1030 1031 for (ce = cel->lru; ce; ce = next) { 1032 next = ce->next; 1033 if (ce->lock != 0) { 1034 if (ce->func) 1035 printf("*** shutdown_block_cache: block %Ld, lock == %d " 1036 "(arg %p)!\n", ce->block_num, ce->lock, ce->arg); 1037 else 1038 printf("*** shutdown_block_cache: block %Ld, lock == %d!\n", 1039 ce->block_num, ce->lock); 1040 } 1041 1042 if (ce->flags & CE_BUSY) { 1043 printf("* shutdown block cache: bnum %Ld is busy? ce %p\n", 1044 ce->block_num, ce); 1045 } 1046 1047 if ((ce->flags & CE_DIRTY) || ce->clone) { 1048 flush_cache_ent(ce); 1049 } 1050 1051 if (ce->clone) 1052 free(ce->clone); 1053 ce->clone = NULL; 1054 1055 if (ce->data) 1056 free(ce->data); 1057 ce->data = NULL; 1058 1059 if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) { 1060 printf("*** free_device_cache: bad hash table entry %Ld " 1061 "%p != %p\n", ce->block_num, junk, ce); 1062 } 1063 1064 memset(ce, 0xfd, sizeof(*ce)); 1065 free(ce); 1066 1067 bc.cur_blocks--; 1068 } 1069} 1070 1071 1072void 1073shutdown_block_cache(void) 1074{ 1075 /* print_hash_stats(&bc.ht); */ 1076 1077 if (bc.lock.s > 0) 1078 LOCK(bc.lock); 1079 1080#ifndef USER 1081 unregister_kernel_daemon(cache_flusher, NULL); 1082#endif 1083 1084 delete_cache_list(&bc.normal); 1085 delete_cache_list(&bc.locked); 1086 1087 bc.normal.lru = bc.normal.mru = NULL; 1088 bc.locked.lru = bc.locked.mru = NULL; 1089 1090 shutdown_hash_table(&bc.ht); 1091 1092 if (bc.lock.s > 0) 1093 free_lock(&bc.lock); 1094 bc.lock.s = -1; 1095 1096 if (iovec_lock.s >= 0) 1097 free_lock(&iovec_lock); 1098} 1099 1100 1101 1102int 1103init_cache_for_device(int fd, fs_off_t max_blocks) 1104{ 1105 int ret = 0; 1106 1107 if (fd >= MAX_DEVICES) 1108 return -1; 1109 1110 LOCK(bc.lock); 1111 1112 if (max_device_blocks[fd] != 0) { 1113 printf("device %d is already initialized!\n", fd); 1114 ret = -1; 1115 } else { 1116 max_device_blocks[fd] = max_blocks; 1117 } 1118 1119 UNLOCK(bc.lock); 1120 1121 return ret; 1122} 1123 1124 1125 1126/* 1127 this routine assumes that bc.lock has been acquired 1128*/ 1129static cache_ent * 1130block_lookup(int dev, fs_off_t bnum) 1131{ 1132 int count = 0; 1133 cache_ent *ce; 1134 1135 while (1) { 1136 ce = hash_lookup(&bc.ht, dev, bnum); 1137 if (ce == NULL) 1138 return NULL; 1139 1140 if ((ce->flags & CE_BUSY) == 0) /* it's ok, break out and return it */ 1141 break; 1142 1143 /* else, it's busy and we need to retry our lookup */ 1144 UNLOCK(bc.lock); 1145 1146 snooze(5000); 1147 if (count++ == 5000) { /* then a lot of time has elapsed */ 1148 printf("block %Ld isn't coming un-busy (ce @ %p)\n", 1149 ce->block_num, ce); 1150 } 1151 1152 LOCK(bc.lock); 1153 } 1154 1155 if (ce->flags & CE_BUSY) 1156 panic("block lookup: returning a busy block @ 0x%lx?!?\n",(ulong)ce); 1157 1158 return ce; 1159} 1160 1161 1162 1163int 1164set_blocks_info(int dev, fs_off_t *blocks, int nblocks, 1165 void (*func)(fs_off_t bnum, size_t nblocks, void *arg), void *arg) 1166{ 1167 int i, j, cur; 1168 cache_ent *ce; 1169 cache_ent *ents[NUM_FLUSH_BLOCKS]; 1170 1171 LOCK(bc.lock); 1172 1173 1174 for(i=0, cur=0; i < nblocks; i++) { 1175 1176 /* printf("sbi: %ld (arg 0x%x)\n", blocks[i], arg); */ 1177 ce = block_lookup(dev, blocks[i]); 1178 if (ce == NULL) { 1179 panic("*** set_block_info can't find bnum %ld!\n", blocks[i]); 1180 UNLOCK(bc.lock); 1181 return ENOENT; /* hopefully this doesn't happen... */ 1182 } 1183 1184 1185 if (blocks[i] != ce->block_num || dev != ce->dev) { 1186 UNLOCK(bc.lock); 1187 panic("** error1: looked up dev %d block %ld but found dev %d " 1188 "bnum %ld\n", dev, blocks[i], ce->dev, ce->block_num); 1189 return EBADF; 1190 } 1191 1192 if (ce->lock == 0) { 1193 panic("* set_block_info on bnum %ld (%d) but it's not locked!\n", 1194 blocks[i], nblocks); 1195 } 1196 1197 1198 if ((ce->flags & CE_DIRTY) == 0) { 1199 panic("*** set_block_info on non-dirty block bnum %ld (%d)!\n", 1200 blocks[i], nblocks); 1201 } 1202 1203 ce->flags |= CE_BUSY; /* mark all blocks as busy till we're done */ 1204 1205 /* if there is cloned data, it needs to be flushed now */ 1206 if (ce->clone && ce->func) { 1207 ents[cur++] = ce; 1208 1209 if (cur >= NUM_FLUSH_BLOCKS) { 1210 UNLOCK(bc.lock); 1211 1212 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp); 1213 1214 flush_ents(ents, cur); 1215 1216 LOCK(bc.lock); 1217 for(j=0; j < cur; j++) 1218 ents[j]->flags &= ~CE_BUSY; 1219 cur = 0; 1220 } 1221 } 1222 } 1223 1224 1225 if (cur != 0) { 1226 UNLOCK(bc.lock); 1227 1228 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp); 1229 1230 flush_ents(ents, cur); 1231 1232 LOCK(bc.lock); 1233 for(j=0; j < cur; j++) 1234 ents[j]->flags &= ~CE_BUSY; 1235 cur = 0; 1236 } 1237 1238 1239 /* now go through and set the info that we were asked to */ 1240 for (i = 0; i < nblocks; i++) { 1241 /* we can call hash_lookup() here because we know it's around */ 1242 ce = hash_lookup(&bc.ht, dev, blocks[i]); 1243 if (ce == NULL) { 1244 panic("*** set_block_info can't find bnum %Ld!\n", blocks[i]); 1245 UNLOCK(bc.lock); 1246 return ENOENT; /* hopefully this doesn't happen... */ 1247 } 1248 1249 ce->flags &= ~(CE_DIRTY | CE_BUSY); 1250 1251 if (ce->func != NULL) { 1252 panic("*** set_block_info non-null callback on bnum %Ld\n", 1253 ce->block_num); 1254 } 1255 1256 if (ce->clone != NULL) { 1257 panic("*** ce->clone == %p, not NULL in set_block_info\n", 1258 ce->clone); 1259 } 1260 1261 ce->clone = (void *)malloc(ce->bsize); 1262 if (ce->clone == NULL) 1263 panic("*** can't clone bnum %Ld (bsize %d)\n", 1264 ce->block_num, ce->bsize); 1265 1266 1267 memcpy(ce->clone, ce->data, ce->bsize); 1268 1269 ce->func = func; 1270 ce->arg = arg; 1271 1272 ce->logged_bnum = blocks[i]; 1273 1274 ce->lock--; 1275 if (ce->lock < 0) { 1276 printf("sbi: whoa nellie! ce @ %p (%Ld) has lock == %d\n", 1277 ce, ce->block_num, ce->lock); 1278 } 1279 1280 if (ce->lock == 0) { 1281 delete_from_list(&bc.locked, ce); 1282 add_to_head(&bc.normal, ce); 1283 } 1284 } 1285 1286 UNLOCK(bc.lock); 1287 1288 return 0; 1289} 1290 1291 1292/* this function is only for use by flush_device() */ 1293static void 1294do_flush(cache_ent **ents, int max) 1295{ 1296 int i; 1297 1298 for(i=0; i < max; i++) { 1299 ents[i]->flags |= CE_BUSY; 1300 } 1301 1302 UNLOCK(bc.lock); 1303 1304 qsort(ents, max, sizeof(cache_ent **), cache_ent_cmp); 1305 flush_ents(ents, max); 1306 1307 LOCK(bc.lock); 1308 for(i=0; i < max; i++) { 1309 ents[i]->flags &= ~CE_BUSY; 1310 } 1311} 1312 1313int 1314flush_device(int dev, int warn_locked) 1315{ 1316 int cur; 1317 cache_ent *ce; 1318 cache_ent *ents[NUM_FLUSH_BLOCKS]; 1319 1320 LOCK(bc.lock); 1321 1322 cur = 0; 1323 ce = bc.normal.lru; 1324 while (ce) { 1325 if (ce->dev != dev || (ce->flags & CE_BUSY)) { 1326 ce = ce->next; 1327 continue; 1328 } 1329 1330 if ((ce->flags & CE_DIRTY) || ce->clone) { 1331 ents[cur++] = ce; 1332 if (cur >= NUM_FLUSH_BLOCKS) { 1333 do_flush(ents, cur); 1334 1335 ce = bc.normal.lru; 1336 cur = 0; 1337 continue; 1338 } 1339 } 1340 1341 ce = ce->next; 1342 } 1343 1344 if (cur != 0) 1345 do_flush(ents, cur); 1346 1347 cur = 0; 1348 ce = bc.locked.lru; 1349 while (ce) { 1350 if (ce->dev != dev || (ce->flags & CE_BUSY)) { 1351 ce = ce->next; 1352 continue; 1353 } 1354 1355 if (ce->clone) { 1356 ents[cur++] = ce; 1357 if (cur >= NUM_FLUSH_BLOCKS) { 1358 do_flush(ents, cur); 1359 1360 ce = bc.locked.lru; 1361 cur = 0; 1362 continue; 1363 } 1364 } 1365 1366 ce = ce->next; 1367 } 1368 1369 if (cur != 0) 1370 do_flush(ents, cur); 1371 1372 UNLOCK(bc.lock); 1373 1374 return 0; 1375} 1376 1377 1378static void 1379real_remove_cached_blocks(int dev, int allow_writes, cache_ent_list *cel) 1380{ 1381 void *junk; 1382 cache_ent *ce, *next = NULL; 1383 1384 for(ce=cel->lru; ce; ce=next) { 1385 next = ce->next; 1386 1387 if (ce->dev != dev) { 1388 continue; 1389 } 1390 1391 if (ce->lock != 0 || (ce->flags & CE_BUSY)) { 1392 printf("*** remove_cached_dev: block %Ld has lock = %d, flags " 1393 "0x%x! ce @ 0x%lx\n", ce->block_num, ce->lock, ce->flags, 1394 (ulong)ce); 1395 } 1396 1397 if (allow_writes == ALLOW_WRITES && 1398 ((ce->flags & CE_DIRTY) || ce->clone)) { 1399 ce->flags |= CE_BUSY; 1400 flush_cache_ent(ce); 1401 ce->flags &= ~CE_BUSY; 1402 } 1403 1404 /* unlink this guy */ 1405 if (cel->lru == ce) 1406 cel->lru = ce->next; 1407 1408 if (cel->mru == ce) 1409 cel->mru = ce->prev; 1410 1411 if (ce->prev) 1412 ce->prev->next = ce->next; 1413 if (ce->next) 1414 ce->next->prev = ce->prev; 1415 1416 if (ce->clone) 1417 free(ce->clone); 1418 ce->clone = NULL; 1419 1420 if (ce->data) 1421 free(ce->data); 1422 ce->data = NULL; 1423 1424 if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) { 1425 panic("*** remove_cached_device: bad hash table entry %ld " 1426 "0x%lx != 0x%lx\n", ce->block_num, (ulong)junk, (ulong)ce); 1427 } 1428 1429 free(ce); 1430 1431 bc.cur_blocks--; 1432 } 1433 1434} 1435 1436int 1437remove_cached_device_blocks(int dev, int allow_writes) 1438{ 1439 LOCK(bc.lock); 1440 1441 real_remove_cached_blocks(dev, allow_writes, &bc.normal); 1442 real_remove_cached_blocks(dev, allow_writes, &bc.locked); 1443 1444 max_device_blocks[dev] = 0; 1445 1446 UNLOCK(bc.lock); 1447 1448 return 0; 1449} 1450 1451 1452 1453int 1454flush_blocks(int dev, fs_off_t bnum, int nblocks) 1455{ 1456 int cur, i; 1457 cache_ent *ce; 1458 cache_ent *ents[NUM_FLUSH_BLOCKS]; 1459 1460 if (nblocks == 0) /* might as well check for this */ 1461 return 0; 1462 1463 LOCK(bc.lock); 1464 1465 cur = 0; 1466 for(; nblocks > 0; nblocks--, bnum++) { 1467 ce = block_lookup(dev, bnum); 1468 if (ce == NULL) 1469 continue; 1470 1471 if (bnum != ce->block_num || dev != ce->dev) { 1472 UNLOCK(bc.lock); 1473 panic("error2: looked up dev %d block %ld but found %d %ld\n", 1474 dev, bnum, ce->dev, ce->block_num); 1475 return EBADF; 1476 } 1477 1478 if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL) 1479 continue; 1480 1481 ce->flags |= CE_BUSY; 1482 ents[cur++] = ce; 1483 1484 if (cur >= NUM_FLUSH_BLOCKS) { 1485 UNLOCK(bc.lock); 1486 1487 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp); 1488 flush_ents(ents, cur); 1489 1490 LOCK(bc.lock); 1491 for(i=0; i < cur; i++) { 1492 ents[i]->flags &= ~CE_BUSY; 1493 } 1494 cur = 0; 1495 } 1496 } 1497 1498 UNLOCK(bc.lock); 1499 1500 if (cur == 0) /* nothing more to do */ 1501 return 0; 1502 1503 /* flush out the last few buggers */ 1504 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp); 1505 flush_ents(ents, cur); 1506 1507 for(i=0; i < cur; i++) { 1508 ents[i]->flags &= ~CE_BUSY; 1509 } 1510 1511 return 0; 1512} 1513 1514 1515int 1516mark_blocks_dirty(int dev, fs_off_t bnum, int nblocks) 1517{ 1518 int ret = 0; 1519 cache_ent *ce; 1520 1521 LOCK(bc.lock); 1522 1523 while(nblocks > 0) { 1524 ce = block_lookup(dev, bnum); 1525 if (ce) { 1526 ce->flags |= CE_DIRTY; 1527 bnum += 1; 1528 nblocks -= 1; 1529 } else { /* hmmm, that's odd, didn't find it */ 1530 printf("** mark_blocks_diry couldn't find block %Ld (len %d)\n", 1531 bnum, nblocks); 1532 ret = ENOENT; 1533 break; 1534 } 1535 } 1536 1537 UNLOCK(bc.lock); 1538 1539 return ret; 1540} 1541 1542 1543 1544int 1545release_block(int dev, fs_off_t bnum) 1546{ 1547 cache_ent *ce; 1548 1549 /* printf("rlsb: %ld\n", bnum); */ 1550 LOCK(bc.lock); 1551 1552 ce = block_lookup(dev, bnum); 1553 if (ce) { 1554 if (bnum != ce->block_num || dev != ce->dev) { 1555 panic("*** error3: looked up dev %d block %ld but found %d %ld\n", 1556 dev, bnum, ce->dev, ce->block_num); 1557 UNLOCK(bc.lock); 1558 return EBADF; 1559 } 1560 1561 ce->lock--; 1562 1563 if (ce->lock < 0) { 1564 printf("rlsb: whoa nellie! ce %Ld has lock == %d\n", 1565 ce->block_num, ce->lock); 1566 } 1567 1568 if (ce->lock == 0) { 1569 delete_from_list(&bc.locked, ce); 1570 add_to_head(&bc.normal, ce); 1571 } 1572 1573 } else { /* hmmm, that's odd, didn't find it */ 1574 panic("** release_block asked to find %ld but it's not here\n", 1575 bnum); 1576 } 1577 1578 UNLOCK(bc.lock); 1579 1580 return 0; 1581} 1582 1583 1584static cache_ent * 1585new_cache_ent(int bsize) 1586{ 1587 cache_ent *ce; 1588 1589 ce = (cache_ent *)calloc(1, sizeof(cache_ent)); 1590 if (ce == NULL) { 1591 panic("*** error: cache can't allocate memory!\n"); 1592 return NULL; 1593 } 1594 1595 ce->data = malloc(bsize); 1596 if (ce->data == NULL) { 1597 free(ce); 1598 panic("** error cache can't allocate data memory\n"); 1599 UNLOCK(bc.lock); 1600 return NULL; 1601 } 1602 1603 ce->dev = -1; 1604 ce->block_num = -1; 1605 1606 return ce; 1607} 1608 1609 1610static void 1611get_ents(cache_ent **ents, int num_needed, int max, int *num_gotten, int bsize) 1612{ 1613 int cur, retry_counter = 0, max_retry = num_needed * 256; 1614 cache_ent *ce; 1615 1616 if (num_needed > max) 1617 panic("get_ents: num_needed %d but max %d (doh!)\n", num_needed, max); 1618 1619 /* if the cache isn't full yet, just allocate the blocks */ 1620 for(cur=0; bc.cur_blocks < bc.max_blocks && cur < num_needed; cur++) { 1621 ents[cur] = new_cache_ent(bsize); 1622 if (ents[cur] == NULL) 1623 break; 1624 bc.cur_blocks++; 1625 } 1626 1627 /* pluck off blocks from the LRU end of the normal list, keep trying too */ 1628 while(cur < num_needed && retry_counter < max_retry) { 1629 for(ce=bc.normal.lru; ce && cur < num_needed; ce=ce->next) { 1630 if (ce->lock) 1631 panic("get_ents: normal list has locked blocks (ce 0x%x)\n",ce); 1632 1633 if (ce->flags & CE_BUSY) /* don't touch busy blocks */ 1634 continue; 1635 1636 ce->flags |= CE_BUSY; 1637 ents[cur++] = ce; 1638 } 1639 1640 if (cur < num_needed) { 1641 UNLOCK(bc.lock); 1642 snooze(10000); 1643 LOCK(bc.lock); 1644 retry_counter++; 1645 } 1646 } 1647 1648 if (cur < num_needed && retry_counter >= max_retry) { /* oh shit! */ 1649 dump_cache_list(); 1650 UNLOCK(bc.lock); 1651 panic("get_ents: waited too long; can't get enough ce's (c %d n %d)\n", 1652 cur, num_needed); 1653 } 1654 1655 /* 1656 If the last block is a dirty one, try to get more of 'em so 1657 that we can flush a bunch of blocks at once. 1658 */ 1659 if (cur && cur < max && 1660 ((ents[cur-1]->flags & CE_DIRTY) || ents[cur-1]->clone)) { 1661 1662 for(ce=ents[cur-1]->next; ce && cur < max; ce=ce->next) { 1663 if (ce->flags & CE_BUSY) /* don't touch busy blocks */ 1664 continue; 1665 1666 if (ce->lock) 1667 panic("get_ents:2 dirty list has locked blocks (ce 0x%x)\n",ce); 1668 1669 ce->flags |= CE_BUSY; 1670 ents[cur++] = ce; 1671 } 1672 } 1673 1674 *num_gotten = cur; 1675} 1676 1677 1678static int 1679read_into_ents(int dev, fs_off_t bnum, cache_ent **ents, int num, int bsize) 1680{ 1681 int i, ret; 1682 struct iovec *iov; 1683 1684 iov = get_iovec_array(); 1685 1686 for (i = 0; i < num; i++) { 1687 iov[i].iov_base = ents[i]->data; 1688 iov[i].iov_len = bsize; 1689 } 1690 1691 if (chatty_io > 2) 1692 printf("readv @ %Ld for %d blocks (at %Ld, block_size = %d)\n", bnum, num, bnum*bsize, bsize); 1693 ret = readv_pos(dev, bnum*bsize, iov, num); 1694 1695 release_iovec_array(iov); 1696 1697 if (ret != num*bsize) { 1698 printf("read_into_ents: asked to read %d bytes but got %d\n", 1699 num*bsize, ret); 1700 printf("*** iov @ %p (num %d)\n", iov, num); 1701 return EINVAL; 1702 } else 1703 return 0; 1704} 1705 1706 1707 1708#define CACHE_READ 0x0001 1709#define CACHE_WRITE 0x0002 1710#define CACHE_NOOP 0x0004 /* for getting empty blocks */ 1711#define CACHE_LOCKED 0x0008 1712#define CACHE_READ_AHEAD_OK 0x0010 /* it's ok to do read-ahead */ 1713 1714 1715static char * 1716op_to_str(int op) 1717{ 1718 static char buff[128]; 1719 1720 if (op & CACHE_READ) 1721 strcpy(buff, "READ"); 1722 else if (op & CACHE_WRITE) 1723 strcpy(buff, "WRITE"); 1724 else if (op & CACHE_NOOP) 1725 strcpy(buff, "NOP"); 1726 1727 if (op & CACHE_LOCKED) 1728 strcat(buff, " LOCKED"); 1729 1730 if (op & CACHE_READ_AHEAD_OK) 1731 strcat(buff, " (AHEAD)"); 1732 1733 return buff; 1734} 1735 1736static int 1737cache_block_io(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize, 1738 int op, void **dataptr) 1739{ 1740 size_t err = 0; 1741 cache_ent *ce; 1742 cache_ent_list *cel; 1743 1744 if (chatty_io > 1) 1745 printf("cbio: bnum = %Ld, num_blocks = %Ld, bsize = %d, op = %s\n", bnum, num_blocks, 1746 bsize, op_to_str(op)); 1747 1748 /* some sanity checks first */ 1749 if (bsize == 0) 1750 panic("cache_io: block size == 0 for bnum %Ld?!?\n", bnum); 1751 1752 if (num_blocks == 0) 1753 panic("cache_io: bnum %Ld has num_blocks == 0!\n", bnum); 1754 1755 if (data == NULL && dataptr == NULL) { 1756 printf("major butthead move: null data and dataptr! bnum %Ld:%Ld\n", 1757 bnum, num_blocks); 1758 return ENOMEM; 1759 } 1760 1761 if (data == NULL) { 1762 if (num_blocks != 1) /* get_block() should never do that */ 1763 panic("cache_io: num_blocks %Ld but should be 1\n", 1764 num_blocks); 1765 1766 if (op & CACHE_WRITE) 1767 panic("cache_io: get_block() asked to write?!?\n"); 1768 } 1769 1770 if (bnum + num_blocks > max_device_blocks[dev]) { 1771 printf("dev %d: access to blocks %Ld:%Ld but max_dev_blocks is %Ld\n", 1772 dev, bnum, num_blocks, max_device_blocks[dev]); 1773 1774 // let the app crash here 1775 *(int *)0x3100 = 0xc0debabe; 1776 return EINVAL; 1777 } 1778 1779 last_cache_access = system_time(); 1780 1781 /* if the i/o is greater than 64k, do it directly */ 1782 if (num_blocks * bsize >= 64 * 1024) { 1783 char *ptr; 1784 fs_off_t tmp; 1785 1786 if (data == NULL || (op & CACHE_LOCKED)) { 1787 panic("*** asked to do a large locked io that's too hard!\n"); 1788 } 1789 1790 1791 if (op & CACHE_READ) { 1792 if (read_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) { 1793 printf("cache read:read_phys_blocks failed (%s on blocks %Ld:%Ld)!\n", 1794 strerror(errno), bnum, num_blocks); 1795 return EINVAL; 1796 } 1797 1798 LOCK(bc.lock); 1799 1800 /* if any of the blocks are in the cache, grab them instead */ 1801 ptr = data; 1802 for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) { 1803 ce = block_lookup(dev, tmp); 1804 /* 1805 if we find a block in the cache we have to copy its 1806 data just in case it is more recent than what we just 1807 read from disk (which could happen if someone wrote 1808 these blocks after we did the read but before we locked 1809 the cache and entered this loop). 1810 */ 1811 if (ce) { 1812 if (tmp != ce->block_num || dev != ce->dev) { 1813 UNLOCK(bc.lock); 1814 panic("*** error4: looked up dev %d block %Ld but " 1815 "found %d %Ld\n", dev, tmp, ce->dev, 1816 ce->block_num); 1817 } 1818 1819 memcpy(ptr, ce->data, bsize); 1820 } 1821 } 1822 1823 UNLOCK(bc.lock); 1824 } else if (op & CACHE_WRITE) { 1825 LOCK(bc.lock); 1826 1827 /* if any of the blocks are in the cache, update them too */ 1828 ptr = data; 1829 for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) { 1830 ce = block_lookup(dev, tmp); 1831 if (ce) { 1832 if (tmp != ce->block_num || dev != ce->dev) { 1833 UNLOCK(bc.lock); 1834 panic("*** error5: looked up dev %d block %Ld but " 1835 "found %d %Ld\n", dev, tmp, ce->dev, 1836 ce->block_num); 1837 return EBADF; 1838 } 1839 1840 /* XXXdbg -- this isn't strictly necessary */ 1841 if (ce->clone) { 1842 printf("over-writing cloned data (ce %p bnum %Ld)...\n", ce, tmp); 1843 flush_cache_ent(ce); 1844 } 1845 1846 /* copy the data into the cache */ 1847 memcpy(ce->data, ptr, bsize); 1848 } 1849 } 1850 1851 UNLOCK(bc.lock); 1852 1853 if (write_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) { 1854 printf("cache write: write_phys_blocks failed (%s on blocks " 1855 "%Ld:%Ld)!\n", strerror(errno), bnum, num_blocks); 1856 return EINVAL; 1857 } 1858 } else { 1859 printf("bad cache op %d (bnum %Ld nblocks %Ld)\n", op, bnum, num_blocks); 1860 return EINVAL; 1861 } 1862 1863 return 0; 1864 } 1865 1866 1867 LOCK(bc.lock); 1868 while(num_blocks) { 1869 1870 ce = block_lookup(dev, bnum); 1871 if (ce) { 1872 if (bnum != ce->block_num || dev != ce->dev) { 1873 UNLOCK(bc.lock); 1874 panic("*** error6: looked up dev %d block %ld but found " 1875 "%d %ld\n", dev, bnum, ce->dev, ce->block_num); 1876 return EBADF; 1877 } 1878 1879 if (bsize != ce->bsize) { 1880 panic("*** requested bsize %d but ce->bsize %d ce @ 0x%x\n", 1881 bsize, ce->bsize, ce); 1882 } 1883 1884 /* delete this ent from the list it is in because it may change */ 1885 if (ce->lock) 1886 cel = &bc.locked; 1887 else 1888 cel = &bc.normal; 1889 1890 delete_from_list(cel, ce); 1891 1892 if (op & CACHE_READ) { 1893 if (data && data != ce->data) { 1894 memcpy(data, ce->data, bsize); 1895 } else if (dataptr) { 1896 *dataptr = ce->data; 1897 } else { 1898 printf("cbio:data %p dptr %p ce @ %p ce->data %p\n", 1899 data, dataptr, ce, ce->data); 1900 } 1901 } else if (op & CACHE_WRITE) { 1902 if (data && data != ce->data) 1903 memcpy(ce->data, data, bsize); 1904 1905 ce->flags |= CE_DIRTY; 1906 } else if (op & CACHE_NOOP) { 1907 memset(ce->data, 0, bsize); 1908 if (data) 1909 memset(data, 0, bsize); 1910 1911 if (dataptr) 1912 *dataptr = ce->data; 1913 1914 ce->flags |= CE_DIRTY; 1915 } else { 1916 panic("cached_block_io: bogus op %d\n", op); 1917 } 1918 1919 if (op & CACHE_LOCKED) 1920 ce->lock++; 1921 1922 if (ce->lock) 1923 cel = &bc.locked; 1924 else 1925 cel = &bc.normal; 1926 1927 /* now put this ent at the head of the appropriate list */ 1928 add_to_head(cel, ce); 1929 1930 if (data != NULL) 1931 data = (void *)((char *)data + bsize); 1932 1933 bnum += 1; 1934 num_blocks -= 1; 1935 1936 continue; 1937 } else { /* it's not in the cache */ 1938 int cur, cur_nblocks, num_dirty, real_nblocks, num_needed; 1939 cache_ent *ents[NUM_FLUSH_BLOCKS]; 1940 1941 /* 1942 here we find out how many additional blocks in this request 1943 are not in the cache. the idea is that then we can do one 1944 big i/o on that many blocks at once. 1945 */ 1946 for(cur_nblocks=1; 1947 cur_nblocks < num_blocks && cur_nblocks < NUM_FLUSH_BLOCKS; 1948 cur_nblocks++) { 1949 1950 /* we can call hash_lookup() directly instead of 1951 block_lookup() because we don't care about the 1952 state of the busy bit of the block at this point 1953 */ 1954 if (hash_lookup(&bc.ht, dev, bnum + cur_nblocks)) 1955 break; 1956 } 1957 1958 /* 1959 here we try to figure out how many extra blocks we should read 1960 for read-ahead. we want to read as many as possible that are 1961 not already in the cache and that don't cause us to try and 1962 read beyond the end of the disk. 1963 */ 1964 if ((op & CACHE_READ) && (op & CACHE_READ_AHEAD_OK) && 1965 (cur_nblocks * bsize) < read_ahead_size) { 1966 1967 for(num_needed=cur_nblocks; 1968 num_needed < (read_ahead_size / bsize); 1969 num_needed++) { 1970 1971 if ((bnum + num_needed) >= max_device_blocks[dev]) 1972 break; 1973 1974 if (hash_lookup(&bc.ht, dev, bnum + num_needed)) 1975 break; 1976 } 1977 } else { 1978 num_needed = cur_nblocks; 1979 } 1980 1981 /* this will get us pointers to a bunch of cache_ents we can use */ 1982 get_ents(ents, num_needed, NUM_FLUSH_BLOCKS, &real_nblocks, bsize); 1983 1984 if (real_nblocks < num_needed) { 1985 panic("don't have enough cache ents (need %d got %d %ld::%d)\n", 1986 num_needed, real_nblocks, bnum, num_blocks); 1987 } 1988 1989 /* 1990 There are now three variables used as limits within the ents 1991 array. This is how they are related: 1992 1993 cur_nblocks <= num_needed <= real_nblocks 1994 1995 Ents from 0 to cur_nblocks-1 are going to be used to fulfill 1996 this IO request. Ents from cur_nblocks to num_needed-1 are 1997 for read-ahead. Ents from num_needed to real_nblocks are 1998 extra blocks that get_ents() asked us to flush. Often (and 1999 always on writes) cur_nblocks == num_needed. 2000 2001 Below, we sort the list of ents so that when we flush them 2002 they go out in order. 2003 */ 2004 2005 qsort(ents, real_nblocks, sizeof(cache_ent **), cache_ent_cmp); 2006 2007 /* 2008 delete each ent from its list because it will change. also 2009 count up how many dirty blocks there are and insert into the 2010 hash table any new blocks so that no one else will try to 2011 read them in when we release the cache semaphore to do our I/O. 2012 */ 2013 for(cur=0,num_dirty=0; cur < real_nblocks; cur++) { 2014 ce = ents[cur]; 2015 ce->flags |= CE_BUSY; 2016 2017 /* 2018 insert the new block into the hash table with its new block 2019 number. note that the block is still in the hash table for 2020 its old block number -- and it has to be until we are done 2021 flushing it from the cache (to prevent someone else from 2022 sneaking in in front of us and trying to read the same 2023 block that we're flushing). 2024 */ 2025 if (cur < num_needed) { 2026 if (hash_insert(&bc.ht, dev, bnum + cur, ce) != 0) 2027 panic("could not insert cache ent for %d %ld (0x%lx)\n", 2028 dev, bnum + cur, (ulong)ents[cur]); 2029 } 2030 2031 if (ce->dev == -1) 2032 continue; 2033 2034 if ((ce->flags & CE_DIRTY) || ce->clone) 2035 num_dirty++; 2036 2037 if (ce->lock) 2038 panic("cbio: can't use locked blocks here ce @ 0x%x\n",ce); 2039 else 2040 cel = &bc.normal; 2041 2042 delete_from_list(cel, ce); 2043 } 2044 ce = NULL; 2045 2046 2047 /* 2048 we release the block cache semaphore here so that we can 2049 go do all the i/o we need to do (flushing dirty blocks 2050 that we're kicking out as well as reading any new data). 2051 2052 because all the blocks we're touching are marked busy 2053 no one else should mess with them while we're doing this. 2054 */ 2055 if (num_dirty || (op & CACHE_READ)) { 2056 UNLOCK(bc.lock); 2057 2058 /* this flushes any blocks we're kicking out that are dirty */ 2059 if (num_dirty && (err = flush_ents(ents, real_nblocks)) != 0) { 2060 printf("flush ents failed (ents @ 0x%lx, nblocks %d!\n", 2061 (ulong)ents, cur_nblocks); 2062 goto handle_err; 2063 } 2064 2065 } 2066 2067 /* 2068 now that everything is flushed to disk, go through and 2069 make sure that the data blocks we're going to use are 2070 the right block size for this current request (it's 2071 possible we're kicking out some smaller blocks and need 2072 to reallocate the data block pointer). We do this in two 2073 steps, first free'ing everything and then going through 2074 and doing the malloc's to try and be nice to the memory 2075 system (i.e. allow it to coalesce stuff, etc). 2076 */ 2077 err = 0; 2078 for(cur=0; cur < num_needed; cur++) { 2079 if (ents[cur]->bsize != bsize) { 2080 free(ents[cur]->data); 2081 ents[cur]->data = NULL; 2082 2083 if (ents[cur]->clone) { 2084 free(ents[cur]->clone); 2085 ents[cur]->clone = NULL; 2086 } 2087 } 2088 } 2089 2090 for(cur=0; cur < num_needed; cur++) { 2091 if (ents[cur]->data == NULL) { 2092 ents[cur]->data = (void *)malloc(bsize); 2093 ents[cur]->bsize = bsize; 2094 } 2095 2096 if (ents[cur]->data == NULL) { 2097 printf("cache: no memory for block (bsize %d)!\n", 2098 bsize); 2099 err = ENOMEM; 2100 break; 2101 } 2102 } 2103 2104 /* 2105 if this condition is true it's a pretty serious error. 2106 we'll try and back out gracefully but we're in pretty 2107 deep at this point and it ain't going to be easy. 2108 */ 2109 handle_err: 2110 if (err) { 2111 for(cur=0; cur < num_needed; cur++) { 2112 cache_ent *tmp_ce; 2113 2114 tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur); 2115 if (tmp_ce != ents[cur]) { 2116 panic("hash_del0: %d %ld got 0x%lx, not 0x%lx\n", 2117 dev, bnum+cur, (ulong)tmp_ce, 2118 (ulong)ents[cur]); 2119 } 2120 2121 tmp_ce = (cache_ent *)hash_delete(&bc.ht,ents[cur]->dev, 2122 ents[cur]->block_num); 2123 if (tmp_ce != ents[cur]) { 2124 panic("hash_del1: %d %ld got 0x%lx, not 0x%lx\n", 2125 ents[cur]->dev, ents[cur]->block_num, (ulong)tmp_ce, 2126 (ulong)ents[cur]); 2127 } 2128 2129 ents[cur]->flags &= ~CE_BUSY; 2130 if (ents[cur]->data) 2131 free(ents[cur]->data); 2132 free(ents[cur]); 2133 ents[cur] = NULL; 2134 2135 bc.cur_blocks--; 2136 } 2137 2138 if (cur < real_nblocks) { 2139 LOCK(bc.lock); 2140 for(; cur < real_nblocks; cur++) { 2141 ents[cur]->flags &= ~CE_BUSY; 2142 2143 /* we have to put them back here */ 2144 add_to_tail(&bc.normal, ents[cur]); 2145 } 2146 UNLOCK(bc.lock); 2147 } 2148 2149 return ENOMEM; 2150 } 2151 2152 2153 /* 2154 If we go into this if statement, the block cache lock 2155 has *already been released* up above when we flushed the 2156 dirty entries. As always, since the blocks we're mucking 2157 with are marked busy, they shouldn't get messed with. 2158 */ 2159 err = 0; 2160 if (num_dirty || (op & CACHE_READ)) { 2161 /* this section performs the i/o that we need to do */ 2162 if (op & CACHE_READ) { 2163 err = read_into_ents(dev, bnum, ents, num_needed, bsize); 2164 } else { 2165 err = 0; 2166 } 2167 2168 if (err != 0) { 2169 printf("err %s on dev %d block %Ld:%d (%d) " 2170 "data %p, ents[0] %p\n", 2171 strerror(errno), dev, bnum, cur_nblocks, 2172 bsize, data, ents[0]); 2173 } 2174 2175 /* 2176 acquire the semaphore here so that we can go on mucking 2177 with the cache data structures. We need to delete old 2178 block numbers from the hash table and set the new block 2179 number's for the blocks we just read in. We also put the 2180 read-ahead blocks at the head of mru list. 2181 */ 2182 2183 LOCK(bc.lock); 2184 } 2185 2186 for(cur=0; cur < num_needed; cur++) { 2187 cache_ent *tmp_ce; 2188 2189 ce = ents[cur]; 2190 if (ce->dev != -1) { 2191 tmp_ce = hash_delete(&bc.ht, ce->dev, ce->block_num); 2192 if (tmp_ce == NULL || tmp_ce != ce) { 2193 panic("*** hash_delete failure (ce 0x%x tce 0x%x)\n", 2194 ce, tmp_ce); 2195 } 2196 } 2197 2198 if (err == 0 && cur >= cur_nblocks) { 2199 ce->dev = dev; 2200 ce->block_num = bnum + cur; 2201 ce->flags &= ~CE_BUSY; 2202 add_to_head(&bc.normal, ce); 2203 } 2204 } 2205 ce = NULL; 2206 2207 /* 2208 clear the busy bit on the blocks we force-flushed and 2209 put them on the normal list since they're now clean. 2210 */ 2211 for(; cur < real_nblocks; cur++) { 2212 ents[cur]->flags &= ~CE_BUSY; 2213 2214 if (ents[cur]->lock) 2215 panic("should not have locked blocks here (ce 0x%x)\n", 2216 ents[cur]); 2217 2218 add_to_tail(&bc.normal, ents[cur]); 2219 } 2220 2221 if (err) { /* then we have some cleanup to do */ 2222 for(cur=0; cur < num_needed; cur++) { 2223 cache_ent *tmp_ce; 2224 2225 /* we delete all blocks from the cache so we don't 2226 leave partially written blocks in the cache */ 2227 2228 tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur); 2229 if (tmp_ce != ents[cur]) { 2230 panic("hash_del: %d %ld got 0x%lx, not 0x%lx\n", 2231 dev, bnum+cur, (ulong)tmp_ce, 2232 (ulong)ents[cur]); 2233 } 2234 2235 ce = ents[cur]; 2236 ce->flags &= ~CE_BUSY; 2237 2238 free(ce->data); 2239 ce->data = NULL; 2240 2241 free(ce); 2242 ents[cur] = NULL; 2243 2244 bc.cur_blocks--; 2245 } 2246 ce = NULL; 2247 2248 UNLOCK(bc.lock); 2249 return err; 2250 } 2251 2252 2253 /* 2254 last step: go through and make sure all the cache_ent 2255 structures have the right data in them, delete old guys, etc. 2256 */ 2257 for(cur=0; cur < cur_nblocks; cur++) { 2258 ce = ents[cur]; 2259 2260 if (ce->dev != -1) { /* then clean this guy up */ 2261 if (ce->next || ce->prev) 2262 panic("ce @ 0x%x should not be in a list yet!\n", ce); 2263 2264 if (ce->clone) 2265 free(ce->clone); 2266 2267 if (ce->data == NULL) 2268 panic("ce @ 0x%lx has a null data ptr\n", (ulong)ce); 2269 } 2270 2271 ce->dev = dev; 2272 ce->block_num = bnum + cur; 2273 ce->bsize = bsize; 2274 ce->flags = CE_NORMAL; 2275 ce->lock = 0; 2276 ce->clone = NULL; 2277 ce->func = ce->arg = NULL; 2278 ce->next = ce->prev = NULL; 2279 2280 if (op & CACHE_READ) { 2281 if (data) 2282 memcpy(data, ce->data, bsize); 2283 } else if (op & CACHE_WRITE) { 2284 ce->flags |= CE_DIRTY; 2285 memcpy(ce->data, data, bsize); 2286 } else if (op & CACHE_NOOP) { 2287 memset(ce->data, 0, bsize); 2288 if (data) 2289 memset(data, 0, bsize); 2290 2291 ce->flags |= CE_DIRTY; 2292 } 2293 2294 if (op & CACHE_LOCKED) { 2295 ce->lock++; 2296 cel = &bc.locked; 2297 } else { 2298 cel = &bc.normal; 2299 } 2300 2301 /* now stick this puppy at the head of the mru list */ 2302 add_to_head(cel, ce); 2303 2304 2305 if (dataptr) { 2306 *dataptr = ce->data; 2307 } 2308 2309 if (data != NULL) 2310 data = (void *)((char *)data + bsize); 2311 else if (cur_nblocks != 1) 2312 panic("cache can't handle setting data_ptr twice!\n"); 2313 } /* end of for(cur=0; cur < cur_nblocks; cur++) */ 2314 2315 bnum += cur_nblocks; 2316 num_blocks -= cur_nblocks; 2317 2318 } /* end of else it's not in the cache */ 2319 2320 } /* end of while(num_blocks) */ 2321 2322 UNLOCK(bc.lock); 2323 2324 return 0; 2325} 2326 2327 2328void * 2329get_block(int dev, fs_off_t bnum, int bsize) 2330{ 2331 void *data; 2332 2333 if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_READ|CACHE_LOCKED|CACHE_READ_AHEAD_OK, 2334 &data) != 0) 2335 return NULL; 2336 2337 return data; 2338} 2339 2340void * 2341get_empty_block(int dev, fs_off_t bnum, int bsize) 2342{ 2343 void *data; 2344 2345 if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_NOOP|CACHE_LOCKED, 2346 &data) != 0) 2347 return NULL; 2348 2349 return data; 2350} 2351 2352int 2353cached_read(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize) 2354{ 2355 return cache_block_io(dev, bnum, data, num_blocks, bsize, 2356 CACHE_READ | CACHE_READ_AHEAD_OK, NULL); 2357} 2358 2359 2360int 2361cached_write(int dev, fs_off_t bnum, const void *data, fs_off_t num_blocks,int bsize) 2362{ 2363 return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize, 2364 CACHE_WRITE, NULL); 2365} 2366 2367int 2368cached_write_locked(int dev, fs_off_t bnum, const void *data, 2369 fs_off_t num_blocks, int bsize) 2370{ 2371 return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize, 2372 CACHE_WRITE | CACHE_LOCKED, NULL); 2373} 2374 2375 2376void 2377force_cache_flush(int dev, int prefer_log_blocks) 2378{ 2379 int i, count = 0; 2380 cache_ent *ce; 2381 cache_ent *ents[NUM_FLUSH_BLOCKS]; 2382 2383 2384 LOCK(bc.lock); 2385 2386 for(ce=bc.normal.lru; ce; ce=ce->next) { 2387 if ((ce->dev == dev) && 2388 (ce->flags & CE_BUSY) == 0 && 2389 ((ce->flags & CE_DIRTY) || ce->clone) && 2390 ((prefer_log_blocks && ce->func) || (prefer_log_blocks == 0))) { 2391 2392 ce->flags |= CE_BUSY; 2393 ents[count++] = ce; 2394 2395 if (count >= NUM_FLUSH_BLOCKS) { 2396 break; 2397 } 2398 } 2399 } 2400 2401 /* if we've got some room left, try and grab any cloned blocks */ 2402 if (count < NUM_FLUSH_BLOCKS) { 2403 for(ce=bc.locked.lru; ce; ce=ce->next) { 2404 if ((ce->dev == dev) && 2405 (ce->flags & CE_BUSY) == 0 && 2406 (ce->clone)) { 2407 2408 ce->flags |= CE_BUSY; 2409 ents[count++] = ce; 2410 2411 if (count >= NUM_FLUSH_BLOCKS) { 2412 break; 2413 } 2414 } 2415 } 2416 } 2417 2418 UNLOCK(bc.lock); 2419 2420 if (count != 0) { 2421 qsort(ents, count, sizeof(cache_ent **), cache_ent_cmp); 2422 flush_ents(ents, count); 2423 2424 for(i=0; i < count; i++) 2425 ents[i]->flags &= ~CE_BUSY; 2426 } 2427} 2428