1/*
2   This file contains the global device cache for the BeOS.  All
3   file system I/O comes through here.  The cache can handle blocks
4   of different sizes for multiple different underlying physical
5   devices.
6
7   The cache is organized as a hash table (for lookups by device
8   and block number) and two doubly-linked lists.  The normal
9   list is for "normal" blocks which are either clean or dirty.
10   The locked list is for blocks that are locked in the cache by
11   BFS.  The lists are LRU ordered.
12
13   Most of the work happens in the function cache_block_io() which
14   is quite lengthy.  The other functions of interest are get_ents()
15   which picks victims to be kicked out of the cache; flush_ents()
16   which does the work of flushing them; and set_blocks_info() which
17   handles cloning blocks and setting callbacks so that the BFS
18   journal will work properly.  If you want to modify this code it
19   will take some study but it's not too bad.  Do not think about
20   separating the list of clean and dirty blocks into two lists as
21   I did that already and it's slower.
22
23   Originally this cache code was written while listening to the album
24   "Ride the Lightning" by Metallica.  The current version was written
25   while listening to "Welcome to SkyValley" by Kyuss as well as an
26   ambient collection on the German label FAX.  Music helps a lot when
27   writing code.
28
29   THIS CODE COPYRIGHT DOMINIC GIAMPAOLO.  NO WARRANTY IS EXPRESSED
30   OR IMPLIED.  YOU MAY USE THIS CODE AND FREELY DISTRIBUTE IT FOR
31   NON-COMMERCIAL USE AS LONG AS THIS NOTICE REMAINS ATTACHED.
32
33   FOR COMMERCIAL USE, CONTACT DOMINIC GIAMPAOLO (dbg@be.com).
34
35   Dominic Giampaolo
36   dbg@be.com
37*/
38
39#include <stdio.h>
40#include <stdlib.h>
41#include <memory.h>
42#include <string.h>
43#include <errno.h>
44#include <fcntl.h>
45#include <sys/types.h>
46#include <sys/uio.h>
47#include <unistd.h>
48
49
50#include <OS.h>
51#include <KernelExport.h>
52
53#include "fs_cache.h"
54#include "fs_cache_priv.h"
55#include "lock.h"
56
57
58
59#ifndef USER
60#define printf dprintf
61#endif
62#ifdef USER
63#define kprintf printf
64#endif
65
66
67typedef off_t fs_off_t;
68
69/* forward prototypes */
70static int   flush_ents(cache_ent **ents, int n_ents);
71
72//static int   do_dump(int argc, char **argv);
73//static int   do_find_block(int argc, char **argv);
74//static int   do_find_data(int argc, char **argv);
75//static void  cache_flusher(void *arg, int phase);
76
77
78int chatty_io = 0;
79
80#define CHUNK (512 * 1024)   /* a hack to work around scsi driver bugs */
81
82static void
83beos_panic(const char *format, ...)
84{
85    va_list     ap;
86
87    va_start(ap, format);
88    vfprintf(stderr, format, ap);
89    va_end(ap);
90
91    while (TRUE)
92        ;
93}
94
95size_t
96beos_read_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
97{
98    size_t ret = 0;
99    size_t sum;
100
101    if (chatty_io)
102        printf("R: %8Ld : %3d\n", bnum, num_blocks);
103
104    if (num_blocks * bsize < CHUNK)
105        ret = read_pos(fd, bnum * bsize, data, num_blocks * bsize);
106    else {
107        for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
108            ret = read_pos(fd, (bnum * bsize) + sum, data, CHUNK);
109            if (ret != CHUNK)
110                break;
111
112            data = (void *)((char *)data + CHUNK);
113        }
114
115        if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
116            ret = read_pos(fd, (bnum * bsize) + sum, data,
117                           (num_blocks * bsize) - sum);
118
119            if (ret == (num_blocks * bsize) - sum)
120                ret = num_blocks * bsize;
121        } else if (ret == CHUNK) {
122            ret = num_blocks * bsize;
123        }
124    }
125
126    if (ret == num_blocks * bsize)
127        return 0;
128    else
129        return EBADF;
130}
131
132size_t
133beos_write_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
134{
135    size_t ret = 0;
136    size_t sum;
137
138    if (chatty_io)
139        printf("W: %8Ld : %3d\n", bnum, num_blocks);
140
141    if (num_blocks * bsize < CHUNK)
142        ret = write_pos(fd, bnum * bsize, data, num_blocks * bsize);
143    else {
144        for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
145            ret = write_pos(fd, (bnum * bsize) + sum, data, CHUNK);
146            if (ret != CHUNK)
147                break;
148
149            data = (void *)((char *)data + CHUNK);
150        }
151
152        if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
153            ret = write_pos(fd, (bnum * bsize) + sum, data,
154                            (num_blocks * bsize) - sum);
155
156            if (ret == (num_blocks * bsize) - sum)
157                ret = num_blocks * bsize;
158        } else if (ret == CHUNK) {
159            ret = num_blocks * bsize;
160        }
161    }
162
163
164    if (ret == num_blocks * bsize)
165        return 0;
166    else
167        return EBADF;
168}
169
170//	#pragma mark -
171
172static int
173init_hash_table(hash_table *ht)
174{
175    ht->max   = HT_DEFAULT_MAX;
176    ht->mask  = ht->max - 1;
177    ht->num_elements = 0;
178
179    ht->table = (hash_ent **)calloc(ht->max, sizeof(hash_ent *));
180    if (ht->table == NULL)
181        return ENOMEM;
182
183    return 0;
184}
185
186
187static void
188shutdown_hash_table(hash_table *ht)
189{
190    int       i, hash_len;
191    hash_ent *he, *next;
192
193    for(i=0; i < ht->max; i++) {
194        he = ht->table[i];
195
196        for(hash_len=0; he; hash_len++, he=next) {
197            next = he->next;
198            free(he);
199        }
200    }
201
202    if (ht->table)
203        free(ht->table);
204    ht->table = NULL;
205}
206
207#if 0
208static void
209print_hash_stats(hash_table *ht)
210{
211    int       i, hash_len, max = -1, sum = 0;
212    hash_ent *he, *next;
213
214    for(i=0; i < ht->max; i++) {
215        he = ht->table[i];
216
217        for(hash_len=0; he; hash_len++, he=next) {
218            next = he->next;
219        }
220        if (hash_len)
221            printf("bucket %3d : %3d\n", i, hash_len);
222
223        sum += hash_len;
224        if (hash_len > max)
225            max = hash_len;
226    }
227
228    printf("max # of chains: %d,  average chain length %d\n", max,sum/ht->max);
229}
230#endif
231
232#define HASH(d, b)   ((((fs_off_t)d) << (sizeof(fs_off_t)*8 - 6)) | (b))
233
234static hash_ent *
235new_hash_ent(int dev, fs_off_t bnum, void *data)
236{
237    hash_ent *he;
238
239    he = (hash_ent *)malloc(sizeof(*he));
240    if (he == NULL)
241        return NULL;
242
243    he->hash_val = HASH(dev, bnum);
244    he->dev      = dev;
245    he->bnum     = bnum;
246    he->data     = data;
247    he->next     = NULL;
248
249    return he;
250}
251
252
253static int
254grow_hash_table(hash_table *ht)
255{
256    int        i, omax, newsize, newmask;
257    fs_off_t      hash;
258    hash_ent **new_table, *he, *next;
259
260    if (ht->max & ht->mask) {
261        printf("*** hashtable size %d or mask %d looks weird!\n", ht->max,
262               ht->mask);
263    }
264
265    omax    = ht->max;
266    newsize = omax * 2;        /* have to grow in powers of two */
267    newmask = newsize - 1;
268
269    new_table = (hash_ent **)calloc(newsize, sizeof(hash_ent *));
270    if (new_table == NULL)
271        return ENOMEM;
272
273    for(i=0; i < omax; i++) {
274        for(he=ht->table[i]; he; he=next) {
275            hash = he->hash_val & newmask;
276            next = he->next;
277
278            he->next        = new_table[hash];
279            new_table[hash] = he;
280        }
281    }
282
283    free(ht->table);
284    ht->table = new_table;
285    ht->max   = newsize;
286    ht->mask  = newmask;
287
288    return 0;
289}
290
291
292
293
294static int
295hash_insert(hash_table *ht, int dev, fs_off_t bnum, void *data)
296{
297    fs_off_t    hash;
298    hash_ent *he, *curr;
299
300    hash = HASH(dev, bnum) & ht->mask;
301
302    curr = ht->table[hash];
303    for(; curr != NULL; curr=curr->next)
304        if (curr->dev == dev && curr->bnum == bnum)
305            break;
306
307    if (curr && curr->dev == dev && curr->bnum == bnum) {
308        printf("entry %d:%Ld already in the hash table!\n", dev, bnum);
309        return EEXIST;
310    }
311
312    he = new_hash_ent(dev, bnum, data);
313    if (he == NULL)
314        return ENOMEM;
315
316    he->next        = ht->table[hash];
317    ht->table[hash] = he;
318
319    ht->num_elements++;
320    if (ht->num_elements >= ((ht->max * 3) / 4)) {
321        if (grow_hash_table(ht) != 0)
322            return ENOMEM;
323    }
324
325    return 0;
326}
327
328static void *
329hash_lookup(hash_table *ht, int dev, fs_off_t bnum)
330{
331    hash_ent *he;
332
333    he = ht->table[HASH(dev, bnum) & ht->mask];
334
335    for(; he != NULL; he=he->next) {
336        if (he->dev == dev && he->bnum == bnum)
337            break;
338    }
339
340    if (he)
341        return he->data;
342    else
343        return NULL;
344}
345
346
347static void *
348hash_delete(hash_table *ht, int dev, fs_off_t bnum)
349{
350    void     *data;
351    fs_off_t     hash;
352    hash_ent *he, *prev = NULL;
353
354    hash = HASH(dev, bnum) & ht->mask;
355    he = ht->table[hash];
356
357    for(; he != NULL; prev=he,he=he->next) {
358        if (he->dev == dev && he->bnum == bnum)
359            break;
360    }
361
362    if (he == NULL) {
363        printf("*** hash_delete: tried to delete non-existent block %d:%Ld\n",
364               dev, bnum);
365        return NULL;
366    }
367
368    data = he->data;
369
370    if (ht->table[hash] == he)
371        ht->table[hash] = he->next;
372    else if (prev)
373        prev->next = he->next;
374    else
375        beos_panic("hash table is inconsistent\n");
376
377    free(he);
378    ht->num_elements--;
379
380    return data;
381}
382
383//	#pragma mark -
384
385/*
386  These are the global variables for the cache.
387*/
388static block_cache  bc;
389
390#define       MAX_IOVECS  64           /* # of iovecs for use by cache code */
391static beos_lock   iovec_lock;
392static struct iovec *iovec_pool[MAX_IOVECS];  /* each ptr is to an array of iovecs */
393static int    iovec_used[MAX_IOVECS];  /* non-zero == iovec is in use */
394
395#define NUM_FLUSH_BLOCKS 64    /* size of the iovec array pointed by each ptr */
396
397
398#define DEFAULT_READ_AHEAD_SIZE  (32 * 1024)
399static int read_ahead_size = DEFAULT_READ_AHEAD_SIZE;
400
401/* this array stores the size of each device so we can error check requests */
402#define MAX_DEVICES  256
403fs_off_t max_device_blocks[MAX_DEVICES];
404
405
406/* has the time of the last cache access so cache flushing doesn't interfere */
407static bigtime_t last_cache_access = 0;
408
409
410int
411beos_init_block_cache(int max_blocks, int flags)
412{
413    memset(&bc, 0, sizeof(bc));
414    memset(iovec_pool, 0, sizeof(iovec_pool));
415    memset(iovec_used, 0, sizeof(iovec_used));
416    memset(&max_device_blocks, 0, sizeof(max_device_blocks));
417
418    if (init_hash_table(&bc.ht) != 0)
419        return ENOMEM;
420
421    bc.lock.s = iovec_lock.s = -1;
422
423    bc.max_blocks = max_blocks;
424    bc.flags      = flags;
425    if (beos_new_lock(&bc.lock, "bollockcache") != 0)
426        goto err;
427
428    if (beos_new_lock(&iovec_lock, "iovec_lock") != 0)
429        goto err;
430
431    /* allocate two of these up front so vm won't accidently re-enter itself */
432    iovec_pool[0] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
433    iovec_pool[1] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
434
435#ifndef USER
436#ifdef DEBUG
437    add_debugger_command("bcache", do_dump, "dump the block cache list");
438    add_debugger_command("fblock", do_find_block, "find a block in the cache");
439    add_debugger_command("fdata",  do_find_data, "find a data block ptr in the cache");
440#endif
441    register_kernel_daemon(cache_flusher, NULL, 3);
442#endif
443
444    return 0;
445
446 err:
447    if (bc.lock.s >= 0)
448        beos_free_lock(&bc.lock);
449
450    if (iovec_lock.s >= 0)
451        beos_free_lock(&iovec_lock);
452
453    shutdown_hash_table(&bc.ht);
454    memset((void *)&bc, 0, sizeof(bc));
455    return ENOMEM;
456}
457
458
459static struct iovec *
460get_iovec_array(void)
461{
462    int i;
463    struct iovec *iov;
464
465    LOCK(iovec_lock);
466
467    for(i=0; i < MAX_IOVECS; i++) {
468        if (iovec_used[i] == 0)
469            break;
470    }
471
472    if (i >= MAX_IOVECS)       /* uh-oh */
473        beos_panic("cache: ran out of iovecs (pool 0x%x, used 0x%x)!\n",
474              &iovec_pool[0], &iovec_used[0]);
475
476    if (iovec_pool[i] == NULL) {
477        iovec_pool[i] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
478        if (iovec_pool == NULL)
479            beos_panic("can't allocate an iovec!\n");
480    }
481
482    iov = iovec_pool[i];
483    iovec_used[i] = 1;
484
485    UNLOCK(iovec_lock);
486
487    return iov;
488}
489
490
491static void
492release_iovec_array(struct iovec *iov)
493{
494    int i;
495
496    LOCK(iovec_lock);
497
498    for(i=0; i < MAX_IOVECS; i++) {
499        if (iov == iovec_pool[i])
500            break;
501    }
502
503    if (i < MAX_IOVECS)
504        iovec_used[i] = 0;
505    else                     /* uh-oh */
506        printf("cache: released an iovec I don't own (iov %p)\n", iov);
507
508
509    UNLOCK(iovec_lock);
510}
511
512
513
514
515static void
516real_dump_cache_list(cache_ent_list *cel)
517{
518    cache_ent *ce;
519
520    kprintf("starting from LRU end:\n");
521
522    for (ce = cel->lru; ce; ce = ce->next) {
523        kprintf("ce %p dev %2d bnum %6Ld lock %d flag %d arg %p "
524               "clone %p\n", ce, ce->dev, ce->block_num, ce->lock,
525               ce->flags, ce->arg, ce->clone);
526    }
527    kprintf("MRU end\n");
528}
529
530static void
531dump_cache_list(void)
532{
533    kprintf("NORMAL BLOCKS\n");
534    real_dump_cache_list(&bc.normal);
535
536    kprintf("LOCKED BLOCKS\n");
537    real_dump_cache_list(&bc.locked);
538
539    kprintf("cur blocks %d, max blocks %d ht @ %p\n", bc.cur_blocks,
540           bc.max_blocks, &bc.ht);
541}
542
543#if 0
544static void
545check_bcache(char *str)
546{
547    int count = 0;
548    cache_ent *ce, *prev = NULL;
549
550    LOCK(bc.lock);
551
552    for(ce=bc.normal.lru; ce; prev=ce, ce=ce->next) {
553        count++;
554    }
555
556    for(ce=bc.locked.lru; ce; prev=ce, ce=ce->next) {
557        count++;
558    }
559
560    if (count != bc.cur_blocks) {
561        if (count < bc.cur_blocks - 16)
562            beos_panic("%s: count == %d, cur_blocks %d, prev 0x%x\n",
563                    str, count, bc.cur_blocks, prev);
564        else
565            printf("%s: count == %d, cur_blocks %d, prev 0x%x\n",
566                    str, count, bc.cur_blocks, prev);
567    }
568
569    UNLOCK(bc.lock);
570}
571
572
573static void
574dump_lists(void)
575{
576    cache_ent *nce;
577
578    printf("LOCKED 0x%x  (tail 0x%x, head 0x%x)\n", &bc.locked,
579           bc.locked.lru, bc.locked.mru);
580    for(nce=bc.locked.lru; nce; nce=nce->next)
581        printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
582               nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
583               nce->func);
584
585    printf("NORMAL 0x%x  (tail 0x%x, head 0x%x)\n", &bc.normal,
586           bc.normal.lru, bc.normal.mru);
587    for(nce=bc.normal.lru; nce; nce=nce->next)
588        printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
589               nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
590               nce->func);
591}
592
593
594static void
595check_lists(void)
596{
597    cache_ent *ce, *prev, *oce;
598    cache_ent_list *cel;
599
600    cel = &bc.normal;
601    for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
602        for(oce=bc.locked.lru; oce; oce=oce->next) {
603            if (oce == ce) {
604                dump_lists();
605                beos_panic("1:ce @ 0x%x is in two lists(cel 0x%x &LOCKED)\n",ce,cel);
606            }
607        }
608    }
609    if (prev && prev != cel->mru) {
610        dump_lists();
611        beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
612              prev, cel);
613    }
614
615    cel = &bc.locked;
616    for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
617        for(oce=bc.normal.lru; oce; oce=oce->next) {
618            if (oce == ce) {
619                dump_lists();
620                beos_panic("3:ce @ 0x%x is in two lists(cel 0x%x & DIRTY)\n",ce,cel);
621            }
622        }
623    }
624    if (prev && prev != cel->mru) {
625        dump_lists();
626        beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
627              prev, cel);
628    }
629}
630#endif
631
632
633#ifdef DEBUG
634#if 0
635static int
636do_dump(int argc, char **argv)
637{
638    dump_cache_list();
639    return 1;
640}
641
642
643static int
644do_find_block(int argc, char **argv)
645{
646    int        i;
647    fs_off_t  bnum;
648    cache_ent *ce;
649
650    if (argc < 2) {
651        kprintf("%s: needs a block # argument\n", argv[0]);
652        return 1;
653    }
654
655    for(i=1; i < argc; i++) {
656        bnum = strtoul(argv[i], NULL, 0);
657
658        for(ce=bc.normal.lru; ce; ce=ce->next) {
659            if (ce->block_num == bnum) {
660                kprintf("found clean bnum %ld @ 0x%lx (data @ 0x%lx)\n",
661                        bnum, ce, ce->data);
662            }
663        }
664
665        for(ce=bc.locked.lru; ce; ce=ce->next) {
666            if (ce->block_num == bnum) {
667                kprintf("found locked bnum %ld @ 0x%lx (data @ 0x%lx)\n",
668                        bnum, ce, ce->data);
669            }
670        }
671    }
672
673    return 0;
674}
675
676static int
677do_find_data(int argc, char **argv)
678{
679    int        i;
680    void      *data;
681    cache_ent *ce;
682
683    if (argc < 2) {
684        kprintf("%s: needs a block # argument\n", argv[0]);
685        return 1;
686    }
687
688    for(i=1; i < argc; i++) {
689        data = (void *)strtoul(argv[i], NULL, 0);
690
691        for(ce=bc.normal.lru; ce; ce=ce->next) {
692            if (ce->data == data) {
693                kprintf("found normal data ptr for bnum %ld @ ce 0x%lx\n",
694                        ce->block_num, ce);
695            }
696        }
697
698        for(ce=bc.locked.lru; ce; ce=ce->next) {
699            if (ce->data == data) {
700                kprintf("found locked data ptr for bnum %ld @ ce 0x%lx\n",
701                        ce->block_num, ce);
702            }
703        }
704    }
705
706    return 0;
707}
708#endif
709#endif /* DEBUG */
710
711
712
713/*
714  this function detaches the cache_ent from the list.
715*/
716static void
717delete_from_list(cache_ent_list *cel, cache_ent *ce)
718{
719    if (ce->next)
720        ce->next->prev = ce->prev;
721    if (ce->prev)
722        ce->prev->next = ce->next;
723
724    if (cel->lru == ce)
725        cel->lru = ce->next;
726    if (cel->mru == ce)
727        cel->mru = ce->prev;
728
729    ce->next = NULL;
730    ce->prev = NULL;
731}
732
733
734
735/*
736  this function adds the cache_ent ce to the head of the
737  list (i.e. the MRU end).  the cache_ent should *not*
738  be in any lists.
739*/
740static void
741add_to_head(cache_ent_list *cel, cache_ent *ce)
742{
743if (ce->next != NULL || ce->prev != NULL) {
744    beos_panic("*** ath: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n",
745           ce, ce->next, ce->prev);
746}
747
748    ce->next = NULL;
749    ce->prev = cel->mru;
750
751    if (cel->mru)
752        cel->mru->next = ce;
753    cel->mru = ce;
754
755    if (cel->lru == NULL)
756        cel->lru = ce;
757}
758
759
760/*
761  this function adds the cache_ent ce to the tail of the
762  list (i.e. the MRU end).  the cache_ent should *not*
763  be in any lists.
764*/
765static void
766add_to_tail(cache_ent_list *cel, cache_ent *ce)
767{
768if (ce->next != NULL || ce->prev != NULL) {
769    beos_panic("*** att: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n",
770           ce, ce->next, ce->prev);
771}
772
773    ce->next = cel->lru;
774    ce->prev = NULL;
775
776    if (cel->lru)
777        cel->lru->prev = ce;
778    cel->lru = ce;
779
780    if (cel->mru == NULL)
781        cel->mru = ce;
782}
783
784
785static int
786cache_ent_cmp(const void *a, const void *b)
787{
788    fs_off_t  diff;
789    cache_ent *p1 = *(cache_ent **)a, *p2 = *(cache_ent **)b;
790
791    if (p1 == NULL || p2 == NULL)
792        beos_panic("cache_ent pointers are null?!? (a 0x%lx, b 0x%lx\n)\n", a, b);
793
794    if (p1->dev == p2->dev) {
795        diff = p1->block_num - p2->block_num;
796        return (int)diff;
797    } else {
798        return p1->dev - p2->dev;
799    }
800}
801
802#if 0
803// ToDo: add this to the fsh (as a background thread)?
804static void
805cache_flusher(void *arg, int phase)
806{
807    int    i, num_ents, err;
808    bigtime_t now = system_time();
809    static cache_ent *ce = NULL;
810    static cache_ent *ents[NUM_FLUSH_BLOCKS];
811
812    /*
813       if someone else was in the cache recently then just bail out so
814       we don't lock them out unnecessarily
815    */
816    if ((now - last_cache_access) < 1000000)
817        return;
818
819    LOCK(bc.lock);
820
821    ce = bc.normal.lru;
822
823    for(num_ents=0; ce && num_ents < NUM_FLUSH_BLOCKS; ce=ce->next) {
824        if (ce->flags & CE_BUSY)
825            continue;
826
827        if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
828            continue;
829
830        ents[num_ents] = ce;
831        ents[num_ents]->flags |= CE_BUSY;
832        num_ents++;
833    }
834
835    /* if we've got some room left over, look for cloned locked blocks */
836    if (num_ents < NUM_FLUSH_BLOCKS) {
837        ce = bc.locked.lru;
838
839        for(; num_ents < NUM_FLUSH_BLOCKS;) {
840            for(;
841                ce && ((ce->flags & CE_BUSY) || ce->clone == NULL);
842                ce=ce->next)
843                /* skip ents that meet the above criteria */;
844
845            if (ce == NULL)
846                break;
847
848            ents[num_ents] = ce;
849            ents[num_ents]->flags |= CE_BUSY;
850            ce = ce->next;
851            num_ents++;
852        }
853    }
854
855    UNLOCK(bc.lock);
856
857    if (num_ents == 0)
858        return;
859
860    qsort(ents, num_ents, sizeof(cache_ent **), cache_ent_cmp);
861
862    if ((err = flush_ents(ents, num_ents)) != 0) {
863        printf("flush ents failed (ents @ 0x%lx, num_ents %d!\n",
864               (ulong)ents, num_ents);
865    }
866
867    for(i=0; i < num_ents; i++) {       /* clear the busy bit on each of ent */
868        ents[i]->flags &= ~CE_BUSY;
869    }
870}
871#endif
872
873
874static int
875flush_cache_ent(cache_ent *ce)
876{
877    int   ret = 0;
878    void *data;
879
880    /* if true, then there's nothing to flush */
881    if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
882        return 0;
883
884    /* same thing here */
885    if (ce->clone == NULL && ce->lock != 0)
886        return 0;
887
888 restart:
889    if (ce->clone)
890        data = ce->clone;
891    else
892        data = ce->data;
893
894	if (chatty_io > 2) printf("flush: %7Ld\n", ce->block_num);
895	ret = beos_write_phys_blocks(ce->dev, ce->block_num, data, 1, ce->bsize);
896
897    if (ce->func) {
898        ce->func(ce->logged_bnum, 1, ce->arg);
899        ce->func = NULL;
900    }
901
902    if (ce->clone) {
903        free(ce->clone);
904        ce->clone = NULL;
905
906        if (ce->lock == 0 && (ce->flags & CE_DIRTY))
907            goto restart;     /* also write the real data ptr */
908    } else {
909        ce->flags &= ~CE_DIRTY;
910    }
911
912    return ret;
913}
914
915
916static int
917flush_ents(cache_ent **ents, int n_ents)
918{
919    int    i, j, k, ret = 0, bsize, iocnt, do_again = 0;
920    fs_off_t  start_bnum;
921    struct iovec *iov;
922
923    iov = get_iovec_array();
924    if (iov == NULL)
925        return ENOMEM;
926
927restart:
928    for(i=0; i < n_ents; i++) {
929        /* if true, then there's nothing to flush */
930        if ((ents[i]->flags & CE_DIRTY) == 0 && ents[i]->clone == NULL)
931            continue;
932
933        /* if true we can't touch the dirty data yet because it's locked */
934        if (ents[i]->clone == NULL && ents[i]->lock != 0)
935            continue;
936
937
938        bsize      = ents[i]->bsize;
939        start_bnum = ents[i]->block_num;
940
941        for(j=i+1; j < n_ents && (j - i) < NUM_FLUSH_BLOCKS; j++) {
942            if (ents[j]->dev != ents[i]->dev ||
943                ents[j]->block_num != start_bnum + (j - i))
944                break;
945
946            if (ents[j]->clone == NULL && ents[j]->lock != 0)
947                break;
948        }
949
950        if (j == i+1) {           /* only one block, just flush it directly */
951            if ((ret = flush_cache_ent(ents[i])) != 0)
952                break;
953            continue;
954        }
955
956
957        for(k=i,iocnt=0; k < j; k++,iocnt++) {
958            if (ents[k]->clone)
959                iov[iocnt].iov_base = ents[k]->clone;
960            else
961                iov[iocnt].iov_base = ents[k]->data;
962
963            iov[iocnt].iov_len = bsize;
964        }
965
966		if (chatty_io)
967	        printf("writev @ %Ld for %d blocks\n", start_bnum, iocnt);
968
969        ret = writev_pos(ents[i]->dev, start_bnum * (fs_off_t)bsize,
970                         &iov[0], iocnt);
971        if (ret != iocnt*bsize) {
972            int idx;
973
974            printf("flush_ents: writev failed: iocnt %d start bnum %Ld "
975                   "bsize %d, ret %d\n", iocnt, start_bnum, bsize, ret);
976
977            for(idx=0; idx < iocnt; idx++)
978                printf("iov[%2d] = %p :: %ld\n", idx, iov[idx].iov_base,
979                       iov[idx].iov_len);
980
981            printf("error %s writing blocks %Ld:%d (%d != %d)\n",
982                   strerror(errno), start_bnum, iocnt, ret, iocnt*bsize);
983            ret = EINVAL;
984            break;
985        }
986        ret = 0;
987
988
989        for(k=i; k < j; k++) {
990            if (ents[k]->func) {
991                ents[k]->func(ents[k]->logged_bnum, 1, ents[k]->arg);
992                ents[k]->func = NULL;
993            }
994
995            if (ents[k]->clone) {
996                free(ents[k]->clone);
997                ents[k]->clone = NULL;
998            } else {
999                ents[k]->flags &= ~CE_DIRTY;
1000            }
1001        }
1002
1003
1004        i = j - 1;  /* i gets incremented by the outer for loop */
1005    }
1006
1007    /*
1008       here we have to go back through and flush any blocks that are
1009       still dirty.  with an arched brow you astutely ask, "but how
1010       could this happen given the above loop?"  Ahhh young grasshopper
1011       I say, the path through the cache is long and twisty and fraught
1012       with peril.  The reason it can happen is that a block can be both
1013       cloned and dirty.  The above loop would only flush the cloned half
1014       of the data, not the main dirty block.  So we have to go back
1015       through and see if there are any blocks that are still dirty.  If
1016       there are we go back to the top of the function and do the whole
1017       thing over.  Kind of grody but it is necessary to insure the
1018       correctness of the log for the Be file system.
1019    */
1020    if (do_again == 0) {
1021        for(i=0; i < n_ents; i++) {
1022            if ((ents[i]->flags & CE_DIRTY) == 0 || ents[i]->lock)
1023                continue;
1024
1025            do_again = 1;
1026            break;
1027        }
1028
1029        if (do_again)
1030            goto restart;
1031    }
1032
1033    release_iovec_array(iov);
1034
1035
1036    return ret;
1037}
1038
1039static void
1040delete_cache_list(cache_ent_list *cel)
1041{
1042    void      *junk;
1043    cache_ent *ce, *next;
1044
1045    for (ce = cel->lru; ce; ce = next) {
1046        next = ce->next;
1047        if (ce->lock != 0) {
1048            if (ce->func)
1049                printf("*** shutdown_block_cache: block %Ld, lock == %d "
1050                       "(arg %p)!\n", ce->block_num, ce->lock, ce->arg);
1051            else
1052                printf("*** shutdown_block_cache: block %Ld, lock == %d!\n",
1053                       ce->block_num, ce->lock);
1054        }
1055
1056        if (ce->flags & CE_BUSY) {
1057            printf("* shutdown block cache: bnum %Ld is busy? ce %p\n",
1058                   ce->block_num, ce);
1059        }
1060
1061        if ((ce->flags & CE_DIRTY) || ce->clone) {
1062            flush_cache_ent(ce);
1063        }
1064
1065        if (ce->clone)
1066            free(ce->clone);
1067        ce->clone = NULL;
1068
1069        if (ce->data)
1070            free(ce->data);
1071        ce->data = NULL;
1072
1073        if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1074            printf("*** free_device_cache: bad hash table entry %Ld "
1075                   "%p != %p\n", ce->block_num, junk, ce);
1076        }
1077
1078        memset(ce, 0xfd, sizeof(*ce));
1079        free(ce);
1080
1081        bc.cur_blocks--;
1082    }
1083}
1084
1085
1086void
1087beos_shutdown_block_cache(void)
1088{
1089    /* print_hash_stats(&bc.ht); */
1090
1091    if (bc.lock.s > 0)
1092        LOCK(bc.lock);
1093
1094#ifndef USER
1095    unregister_kernel_daemon(cache_flusher, NULL);
1096#endif
1097
1098    delete_cache_list(&bc.normal);
1099    delete_cache_list(&bc.locked);
1100
1101    bc.normal.lru = bc.normal.mru = NULL;
1102    bc.locked.lru = bc.locked.mru = NULL;
1103
1104    shutdown_hash_table(&bc.ht);
1105
1106    if (bc.lock.s > 0)
1107        beos_free_lock(&bc.lock);
1108    bc.lock.s = -1;
1109
1110    if (iovec_lock.s >= 0)
1111        beos_free_lock(&iovec_lock);
1112}
1113
1114
1115
1116int
1117beos_init_cache_for_device(int fd, fs_off_t max_blocks)
1118{
1119    int ret = 0;
1120
1121    if (fd >= MAX_DEVICES)
1122        return -1;
1123
1124    LOCK(bc.lock);
1125
1126    if (max_device_blocks[fd] != 0) {
1127        printf("device %d is already initialized!\n", fd);
1128        ret = -1;
1129    } else {
1130        max_device_blocks[fd] = max_blocks;
1131    }
1132
1133    UNLOCK(bc.lock);
1134
1135    return ret;
1136}
1137
1138
1139
1140/*
1141   this routine assumes that bc.lock has been acquired
1142*/
1143static cache_ent *
1144block_lookup(int dev, fs_off_t bnum)
1145{
1146    int        count = 0;
1147    cache_ent *ce;
1148
1149    while (1) {
1150        ce = hash_lookup(&bc.ht, dev, bnum);
1151        if (ce == NULL)
1152            return NULL;
1153
1154        if ((ce->flags & CE_BUSY) == 0) /* it's ok, break out and return it */
1155            break;
1156
1157        /* else, it's busy and we need to retry our lookup */
1158        UNLOCK(bc.lock);
1159
1160        snooze(5000);
1161        if (count++ == 5000) {  /* then a lot of time has elapsed */
1162            printf("block %Ld isn't coming un-busy (ce @ %p)\n",
1163                   ce->block_num, ce);
1164        }
1165
1166        LOCK(bc.lock);
1167    }
1168
1169    if (ce->flags & CE_BUSY)
1170        beos_panic("block lookup: returning a busy block @ 0x%lx?!?\n",(ulong)ce);
1171
1172    return ce;
1173}
1174
1175
1176
1177int
1178beos_set_blocks_info(int dev, fs_off_t *blocks, int nblocks,
1179               void (*func)(fs_off_t bnum, size_t nblocks, void *arg), void *arg)
1180{
1181    int        i, j, cur;
1182    cache_ent *ce;
1183    cache_ent *ents[NUM_FLUSH_BLOCKS];
1184
1185    LOCK(bc.lock);
1186
1187
1188    for(i=0, cur=0; i < nblocks; i++) {
1189
1190        /* printf("sbi:   %ld (arg 0x%x)\n", blocks[i], arg); */
1191        ce = block_lookup(dev, blocks[i]);
1192        if (ce == NULL) {
1193            beos_panic("*** set_block_info can't find bnum %ld!\n", blocks[i]);
1194            UNLOCK(bc.lock);
1195            return ENOENT;   /* hopefully this doesn't happen... */
1196        }
1197
1198
1199        if (blocks[i] != ce->block_num || dev != ce->dev) {
1200            UNLOCK(bc.lock);
1201            beos_panic("** error1: looked up dev %d block %ld but found dev %d "
1202                    "bnum %ld\n", dev, blocks[i], ce->dev, ce->block_num);
1203            return EBADF;
1204        }
1205
1206        if (ce->lock == 0) {
1207            beos_panic("* set_block_info on bnum %ld (%d) but it's not locked!\n",
1208                    blocks[i], nblocks);
1209        }
1210
1211
1212        if ((ce->flags & CE_DIRTY) == 0) {
1213            beos_panic("*** set_block_info on non-dirty block bnum %ld (%d)!\n",
1214                    blocks[i], nblocks);
1215        }
1216
1217        ce->flags |= CE_BUSY;     /* mark all blocks as busy till we're done */
1218
1219        /* if there is cloned data, it needs to be flushed now */
1220        if (ce->clone && ce->func) {
1221            ents[cur++] = ce;
1222
1223            if (cur >= NUM_FLUSH_BLOCKS) {
1224                UNLOCK(bc.lock);
1225
1226                qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1227
1228                flush_ents(ents, cur);
1229
1230                LOCK(bc.lock);
1231                for(j=0; j < cur; j++)
1232                    ents[j]->flags &= ~CE_BUSY;
1233                cur = 0;
1234            }
1235        }
1236    }
1237
1238
1239    if (cur != 0) {
1240        UNLOCK(bc.lock);
1241
1242        qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1243
1244        flush_ents(ents, cur);
1245
1246        LOCK(bc.lock);
1247        for(j=0; j < cur; j++)
1248            ents[j]->flags &= ~CE_BUSY;
1249        cur = 0;
1250    }
1251
1252
1253    /* now go through and set the info that we were asked to */
1254    for (i = 0; i < nblocks; i++) {
1255        /* we can call hash_lookup() here because we know it's around */
1256        ce = hash_lookup(&bc.ht, dev, blocks[i]);
1257        if (ce == NULL) {
1258            beos_panic("*** set_block_info can't find bnum %Ld!\n", blocks[i]);
1259            UNLOCK(bc.lock);
1260            return ENOENT;   /* hopefully this doesn't happen... */
1261        }
1262
1263        ce->flags &= ~(CE_DIRTY | CE_BUSY);
1264
1265        if (ce->func != NULL) {
1266            beos_panic("*** set_block_info non-null callback on bnum %Ld\n",
1267                    ce->block_num);
1268        }
1269
1270        if (ce->clone != NULL) {
1271            beos_panic("*** ce->clone == %p, not NULL in set_block_info\n",
1272                    ce->clone);
1273        }
1274
1275        ce->clone = (void *)malloc(ce->bsize);
1276        if (ce->clone == NULL)
1277            beos_panic("*** can't clone bnum %Ld (bsize %d)\n",
1278                    ce->block_num, ce->bsize);
1279
1280
1281        memcpy(ce->clone, ce->data, ce->bsize);
1282
1283        ce->func   = func;
1284        ce->arg    = arg;
1285
1286        ce->logged_bnum = blocks[i];
1287
1288        ce->lock--;
1289        if (ce->lock < 0) {
1290            printf("sbi: whoa nellie! ce @ %p (%Ld) has lock == %d\n",
1291                   ce, ce->block_num, ce->lock);
1292        }
1293
1294        if (ce->lock == 0) {
1295            delete_from_list(&bc.locked, ce);
1296            add_to_head(&bc.normal, ce);
1297        }
1298    }
1299
1300    UNLOCK(bc.lock);
1301
1302    return 0;
1303}
1304
1305
1306/* this function is only for use by flush_device() */
1307static void
1308do_flush(cache_ent **ents, int max)
1309{
1310    int i;
1311
1312    for(i=0; i < max; i++) {
1313        ents[i]->flags |= CE_BUSY;
1314    }
1315
1316    UNLOCK(bc.lock);
1317
1318    qsort(ents, max, sizeof(cache_ent **), cache_ent_cmp);
1319    flush_ents(ents, max);
1320
1321    LOCK(bc.lock);
1322    for(i=0; i < max; i++) {
1323        ents[i]->flags &= ~CE_BUSY;
1324    }
1325}
1326
1327int
1328beos_flush_device(int dev, int warn_locked)
1329{
1330    int cur;
1331    cache_ent *ce;
1332    cache_ent *ents[NUM_FLUSH_BLOCKS];
1333
1334    LOCK(bc.lock);
1335
1336    cur = 0;
1337    ce = bc.normal.lru;
1338    while (ce) {
1339        if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1340            ce = ce->next;
1341            continue;
1342        }
1343
1344        if ((ce->flags & CE_DIRTY) || ce->clone) {
1345            ents[cur++] = ce;
1346            if (cur >= NUM_FLUSH_BLOCKS) {
1347                do_flush(ents, cur);
1348
1349                ce = bc.normal.lru;
1350                cur = 0;
1351                continue;
1352            }
1353        }
1354
1355        ce = ce->next;
1356    }
1357
1358    if (cur != 0)
1359        do_flush(ents, cur);
1360
1361    cur = 0;
1362    ce = bc.locked.lru;
1363    while (ce) {
1364        if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1365            ce = ce->next;
1366            continue;
1367        }
1368
1369        if (ce->clone) {
1370            ents[cur++] = ce;
1371            if (cur >= NUM_FLUSH_BLOCKS) {
1372                do_flush(ents, cur);
1373
1374                ce = bc.locked.lru;
1375                cur = 0;
1376                continue;
1377            }
1378        }
1379
1380        ce = ce->next;
1381    }
1382
1383    if (cur != 0)
1384        do_flush(ents, cur);
1385
1386    UNLOCK(bc.lock);
1387
1388    return 0;
1389}
1390
1391
1392static void
1393real_remove_cached_blocks(int dev, int allow_writes, cache_ent_list *cel)
1394{
1395    void      *junk;
1396    cache_ent *ce, *next = NULL;
1397
1398    for(ce=cel->lru; ce; ce=next) {
1399        next = ce->next;
1400
1401        if (ce->dev != dev) {
1402            continue;
1403        }
1404
1405        if (ce->lock != 0 || (ce->flags & CE_BUSY)) {
1406            printf("*** remove_cached_dev: block %Ld has lock = %d, flags "
1407                   "0x%x! ce @ 0x%lx\n", ce->block_num, ce->lock, ce->flags,
1408                   (ulong)ce);
1409        }
1410
1411        if (allow_writes == ALLOW_WRITES &&
1412            ((ce->flags & CE_DIRTY) || ce->clone)) {
1413            ce->flags |= CE_BUSY;
1414            flush_cache_ent(ce);
1415            ce->flags &= ~CE_BUSY;
1416        }
1417
1418        /* unlink this guy */
1419        if (cel->lru == ce)
1420            cel->lru = ce->next;
1421
1422        if (cel->mru == ce)
1423            cel->mru = ce->prev;
1424
1425        if (ce->prev)
1426            ce->prev->next = ce->next;
1427        if (ce->next)
1428            ce->next->prev = ce->prev;
1429
1430        if (ce->clone)
1431            free(ce->clone);
1432        ce->clone = NULL;
1433
1434        if (ce->data)
1435            free(ce->data);
1436        ce->data = NULL;
1437
1438        if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1439            beos_panic("*** remove_cached_device: bad hash table entry %ld "
1440                   "0x%lx != 0x%lx\n", ce->block_num, (ulong)junk, (ulong)ce);
1441        }
1442
1443        free(ce);
1444
1445        bc.cur_blocks--;
1446    }
1447
1448}
1449
1450int
1451beos_remove_cached_device_blocks(int dev, int allow_writes)
1452{
1453    LOCK(bc.lock);
1454
1455    real_remove_cached_blocks(dev, allow_writes, &bc.normal);
1456    real_remove_cached_blocks(dev, allow_writes, &bc.locked);
1457
1458    max_device_blocks[dev] = 0;
1459
1460    UNLOCK(bc.lock);
1461
1462    return 0;
1463}
1464
1465
1466
1467int
1468beos_flush_blocks(int dev, fs_off_t bnum, int nblocks)
1469{
1470    int        cur, i;
1471    cache_ent *ce;
1472    cache_ent *ents[NUM_FLUSH_BLOCKS];
1473
1474    if (nblocks == 0)   /* might as well check for this */
1475        return 0;
1476
1477    LOCK(bc.lock);
1478
1479    cur = 0;
1480    for(; nblocks > 0; nblocks--, bnum++) {
1481        ce = block_lookup(dev, bnum);
1482        if (ce == NULL)
1483            continue;
1484
1485        if (bnum != ce->block_num || dev != ce->dev) {
1486            UNLOCK(bc.lock);
1487            beos_panic("error2: looked up dev %d block %ld but found %d %ld\n",
1488                  dev, bnum, ce->dev, ce->block_num);
1489            return EBADF;
1490        }
1491
1492        if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
1493            continue;
1494
1495        ce->flags |= CE_BUSY;
1496        ents[cur++] = ce;
1497
1498        if (cur >= NUM_FLUSH_BLOCKS) {
1499            UNLOCK(bc.lock);
1500
1501            qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1502            flush_ents(ents, cur);
1503
1504            LOCK(bc.lock);
1505            for(i=0; i < cur; i++) {
1506                ents[i]->flags &= ~CE_BUSY;
1507            }
1508            cur = 0;
1509        }
1510    }
1511
1512    UNLOCK(bc.lock);
1513
1514    if (cur == 0)     /* nothing more to do */
1515        return 0;
1516
1517    /* flush out the last few buggers */
1518    qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1519    flush_ents(ents, cur);
1520
1521    for(i=0; i < cur; i++) {
1522        ents[i]->flags &= ~CE_BUSY;
1523    }
1524
1525    return 0;
1526}
1527
1528
1529int
1530beos_mark_blocks_dirty(int dev, fs_off_t bnum, int nblocks)
1531{
1532    int        ret = 0;
1533    cache_ent *ce;
1534
1535    LOCK(bc.lock);
1536
1537    while(nblocks > 0) {
1538        ce = block_lookup(dev, bnum);
1539        if (ce) {
1540            ce->flags |= CE_DIRTY;
1541            bnum      += 1;
1542            nblocks   -= 1;
1543        } else {     /* hmmm, that's odd, didn't find it */
1544            printf("** mark_blocks_diry couldn't find block %Ld (len %d)\n",
1545                   bnum, nblocks);
1546            ret = ENOENT;
1547            break;
1548        }
1549    }
1550
1551    UNLOCK(bc.lock);
1552
1553    return ret;
1554}
1555
1556
1557
1558int
1559beos_release_block(int dev, fs_off_t bnum)
1560{
1561    cache_ent *ce;
1562
1563    /* printf("rlsb: %ld\n", bnum); */
1564    LOCK(bc.lock);
1565
1566    ce = block_lookup(dev, bnum);
1567    if (ce) {
1568        if (bnum != ce->block_num || dev != ce->dev) {
1569            beos_panic("*** error3: looked up dev %d block %ld but found %d %ld\n",
1570                    dev, bnum, ce->dev, ce->block_num);
1571            UNLOCK(bc.lock);
1572            return EBADF;
1573        }
1574
1575        ce->lock--;
1576
1577        if (ce->lock < 0) {
1578            printf("rlsb: whoa nellie! ce %Ld has lock == %d\n",
1579                   ce->block_num, ce->lock);
1580        }
1581
1582        if (ce->lock == 0) {
1583            delete_from_list(&bc.locked, ce);
1584            add_to_head(&bc.normal, ce);
1585        }
1586
1587    } else {     /* hmmm, that's odd, didn't find it */
1588        beos_panic("** release_block asked to find %ld but it's not here\n",
1589               bnum);
1590    }
1591
1592    UNLOCK(bc.lock);
1593
1594    return 0;
1595}
1596
1597
1598static cache_ent *
1599new_cache_ent(int bsize)
1600{
1601    cache_ent *ce;
1602
1603    ce = (cache_ent *)calloc(1, sizeof(cache_ent));
1604    if (ce == NULL) {
1605        beos_panic("*** error: cache can't allocate memory!\n");
1606        return NULL;
1607    }
1608
1609    ce->data = malloc(bsize);
1610    if (ce->data == NULL) {
1611        free(ce);
1612        beos_panic("** error cache can't allocate data memory\n");
1613        UNLOCK(bc.lock);
1614        return NULL;
1615    }
1616
1617    ce->dev       = -1;
1618    ce->block_num = -1;
1619
1620    return ce;
1621}
1622
1623
1624static void
1625get_ents(cache_ent **ents, int num_needed, int max, int *num_gotten, int bsize)
1626{
1627    int        cur, retry_counter = 0, max_retry = num_needed * 256;
1628    cache_ent *ce;
1629
1630    if (num_needed > max)
1631        beos_panic("get_ents: num_needed %d but max %d (doh!)\n", num_needed, max);
1632
1633    /* if the cache isn't full yet, just allocate the blocks */
1634    for(cur=0; bc.cur_blocks < bc.max_blocks && cur < num_needed; cur++) {
1635        ents[cur] = new_cache_ent(bsize);
1636        if (ents[cur] == NULL)
1637            break;
1638        bc.cur_blocks++;
1639    }
1640
1641    /* pluck off blocks from the LRU end of the normal list, keep trying too */
1642    while(cur < num_needed && retry_counter < max_retry) {
1643        for(ce=bc.normal.lru; ce && cur < num_needed; ce=ce->next) {
1644            if (ce->lock)
1645                beos_panic("get_ents: normal list has locked blocks (ce 0x%x)\n",ce);
1646
1647            if (ce->flags & CE_BUSY)   /* don't touch busy blocks */
1648                continue;
1649
1650            ce->flags   |= CE_BUSY;
1651            ents[cur++]  = ce;
1652        }
1653
1654        if (cur < num_needed) {
1655            UNLOCK(bc.lock);
1656            snooze(10000);
1657            LOCK(bc.lock);
1658            retry_counter++;
1659        }
1660    }
1661
1662    if (cur < num_needed && retry_counter >= max_retry) {  /* oh shit! */
1663        dump_cache_list();
1664        UNLOCK(bc.lock);
1665        beos_panic("get_ents: waited too long; can't get enough ce's (c %d n %d)\n",
1666              cur, num_needed);
1667    }
1668
1669    /*
1670      If the last block is a dirty one, try to get more of 'em so
1671      that we can flush a bunch of blocks at once.
1672    */
1673    if (cur && cur < max &&
1674        ((ents[cur-1]->flags & CE_DIRTY) || ents[cur-1]->clone)) {
1675
1676        for(ce=ents[cur-1]->next; ce && cur < max; ce=ce->next) {
1677            if (ce->flags & CE_BUSY)   /* don't touch busy blocks */
1678                continue;
1679
1680            if (ce->lock)
1681                beos_panic("get_ents:2 dirty list has locked blocks (ce 0x%x)\n",ce);
1682
1683            ce->flags   |= CE_BUSY;
1684            ents[cur++]  = ce;
1685        }
1686    }
1687
1688    *num_gotten = cur;
1689}
1690
1691
1692static int
1693read_into_ents(int dev, fs_off_t bnum, cache_ent **ents, int num, int bsize)
1694{
1695    int    i, ret;
1696    struct iovec *iov;
1697
1698    iov = get_iovec_array();
1699
1700    for (i = 0; i < num; i++) {
1701        iov[i].iov_base = ents[i]->data;
1702        iov[i].iov_len  = bsize;
1703    }
1704
1705	if (chatty_io > 2)
1706		printf("readv @ %Ld for %d blocks (at %Ld, block_size = %d)\n", bnum, num, bnum*bsize, bsize);
1707    ret = readv_pos(dev, bnum*bsize, iov, num);
1708
1709    release_iovec_array(iov);
1710
1711    if (ret != num*bsize) {
1712        printf("read_into_ents: asked to read %d bytes but got %d\n",
1713               num*bsize, ret);
1714        printf("*** iov @ %p (num %d)\n", iov, num);
1715        return EINVAL;
1716    } else
1717        return 0;
1718}
1719
1720
1721
1722#define CACHE_READ          0x0001
1723#define CACHE_WRITE         0x0002
1724#define CACHE_NOOP          0x0004     /* for getting empty blocks */
1725#define CACHE_LOCKED        0x0008
1726#define CACHE_READ_AHEAD_OK 0x0010     /* it's ok to do read-ahead */
1727
1728
1729static char *
1730op_to_str(int op)
1731{
1732    static char buff[128];
1733
1734    if (op & CACHE_READ)
1735        strcpy(buff, "READ");
1736    else if (op & CACHE_WRITE)
1737        strcpy(buff, "WRITE");
1738    else if (op & CACHE_NOOP)
1739        strcpy(buff, "NOP");
1740
1741    if (op & CACHE_LOCKED)
1742        strcat(buff, " LOCKED");
1743
1744    if (op & CACHE_READ_AHEAD_OK)
1745        strcat(buff, " (AHEAD)");
1746
1747    return buff;
1748}
1749
1750static int
1751cache_block_io(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize,
1752               int op, void **dataptr)
1753{
1754    size_t          err = 0;
1755    cache_ent      *ce;
1756    cache_ent_list *cel;
1757
1758    if (chatty_io > 1)
1759        printf("cbio: bnum = %Ld, num_blocks = %Ld, bsize = %d, op = %s\n", bnum, num_blocks,
1760               bsize, op_to_str(op));
1761
1762    /* some sanity checks first */
1763    if (bsize == 0)
1764        beos_panic("cache_io: block size == 0 for bnum %Ld?!?\n", bnum);
1765
1766    if (num_blocks == 0)
1767        beos_panic("cache_io: bnum %Ld has num_blocks == 0!\n", bnum);
1768
1769    if (data == NULL && dataptr == NULL) {
1770        printf("major butthead move: null data and dataptr! bnum %Ld:%Ld\n",
1771                bnum, num_blocks);
1772        return ENOMEM;
1773    }
1774
1775    if (data == NULL) {
1776        if (num_blocks != 1)    /* get_block() should never do that */
1777            beos_panic("cache_io: num_blocks %Ld but should be 1\n",
1778                  num_blocks);
1779
1780        if (op & CACHE_WRITE)
1781            beos_panic("cache_io: get_block() asked to write?!?\n");
1782    }
1783
1784    if (bnum + num_blocks > max_device_blocks[dev]) {
1785        printf("dev %d: access to blocks %Ld:%Ld but max_dev_blocks is %Ld\n",
1786               dev, bnum, num_blocks, max_device_blocks[dev]);
1787
1788		// let the app crash here
1789		*(int *)0x3100 = 0xc0debabe;
1790        return EINVAL;
1791    }
1792
1793    last_cache_access = system_time();
1794
1795    /* if the i/o is greater than 64k, do it directly */
1796    if (num_blocks * bsize >= 64 * 1024) {
1797        char  *ptr;
1798        fs_off_t  tmp;
1799
1800        if (data == NULL || (op & CACHE_LOCKED)) {
1801            beos_panic("*** asked to do a large locked io that's too hard!\n");
1802        }
1803
1804
1805        if (op & CACHE_READ) {
1806            if (beos_read_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1807                printf("cache read:read_phys_blocks failed (%s on blocks %Ld:%Ld)!\n",
1808                        strerror(errno), bnum, num_blocks);
1809                return EINVAL;
1810            }
1811
1812            LOCK(bc.lock);
1813
1814            /* if any of the blocks are in the cache, grab them instead */
1815            ptr = data;
1816            for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1817                ce = block_lookup(dev, tmp);
1818                /*
1819                    if we find a block in the cache we have to copy its
1820                    data just in case it is more recent than what we just
1821                    read from disk (which could happen if someone wrote
1822                    these blocks after we did the read but before we locked
1823                    the cache and entered this loop).
1824                */
1825                if (ce) {
1826                    if (tmp != ce->block_num || dev != ce->dev) {
1827                        UNLOCK(bc.lock);
1828                        beos_panic("*** error4: looked up dev %d block %Ld but "
1829                                "found %d %Ld\n", dev, tmp, ce->dev,
1830                                ce->block_num);
1831                    }
1832
1833                    memcpy(ptr, ce->data, bsize);
1834                }
1835            }
1836
1837            UNLOCK(bc.lock);
1838        } else if (op & CACHE_WRITE) {
1839            LOCK(bc.lock);
1840
1841            /* if any of the blocks are in the cache, update them too */
1842            ptr = data;
1843            for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1844                ce = block_lookup(dev, tmp);
1845                if (ce) {
1846                    if (tmp != ce->block_num || dev != ce->dev) {
1847                        UNLOCK(bc.lock);
1848                        beos_panic("*** error5: looked up dev %d block %Ld but "
1849                                "found %d %Ld\n", dev, tmp, ce->dev,
1850                                ce->block_num);
1851                        return EBADF;
1852                    }
1853
1854                    /* XXXdbg -- this isn't strictly necessary */
1855                    if (ce->clone) {
1856                        printf("over-writing cloned data (ce %p bnum %Ld)...\n", ce, tmp);
1857                        flush_cache_ent(ce);
1858                    }
1859
1860                    /* copy the data into the cache */
1861                    memcpy(ce->data, ptr, bsize);
1862                }
1863            }
1864
1865            UNLOCK(bc.lock);
1866
1867            if (beos_write_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1868                printf("cache write: write_phys_blocks failed (%s on blocks "
1869                       "%Ld:%Ld)!\n", strerror(errno), bnum, num_blocks);
1870                return EINVAL;
1871            }
1872        } else {
1873            printf("bad cache op %d (bnum %Ld nblocks %Ld)\n", op, bnum, num_blocks);
1874            return EINVAL;
1875        }
1876
1877        return 0;
1878    }
1879
1880
1881    LOCK(bc.lock);
1882    while(num_blocks) {
1883
1884        ce = block_lookup(dev, bnum);
1885        if (ce) {
1886            if (bnum != ce->block_num || dev != ce->dev) {
1887                UNLOCK(bc.lock);
1888                beos_panic("*** error6: looked up dev %d block %ld but found "
1889                        "%d %ld\n", dev, bnum, ce->dev, ce->block_num);
1890                return EBADF;
1891            }
1892
1893            if (bsize != ce->bsize) {
1894                beos_panic("*** requested bsize %d but ce->bsize %d ce @ 0x%x\n",
1895                        bsize, ce->bsize, ce);
1896            }
1897
1898            /* delete this ent from the list it is in because it may change */
1899            if (ce->lock)
1900                cel = &bc.locked;
1901            else
1902                cel = &bc.normal;
1903
1904            delete_from_list(cel, ce);
1905
1906            if (op & CACHE_READ) {
1907                if (data && data != ce->data) {
1908                    memcpy(data, ce->data, bsize);
1909                } else if (dataptr) {
1910                    *dataptr = ce->data;
1911                } else {
1912                    printf("cbio:data %p dptr %p ce @ %p ce->data %p\n",
1913                           data, dataptr, ce, ce->data);
1914                }
1915            } else if (op & CACHE_WRITE) {
1916                if (data && data != ce->data)
1917                    memcpy(ce->data, data, bsize);
1918
1919                ce->flags |= CE_DIRTY;
1920            } else if (op & CACHE_NOOP) {
1921                memset(ce->data, 0, bsize);
1922                if (data)
1923                    memset(data, 0, bsize);
1924
1925                if (dataptr)
1926                    *dataptr = ce->data;
1927
1928                ce->flags |= CE_DIRTY;
1929            } else {
1930                beos_panic("cached_block_io: bogus op %d\n", op);
1931            }
1932
1933            if (op & CACHE_LOCKED)
1934                ce->lock++;
1935
1936            if (ce->lock)
1937                cel = &bc.locked;
1938            else
1939                cel = &bc.normal;
1940
1941            /* now put this ent at the head of the appropriate list */
1942            add_to_head(cel, ce);
1943
1944            if (data != NULL)
1945                data = (void *)((char *)data + bsize);
1946
1947            bnum       += 1;
1948            num_blocks -= 1;
1949
1950            continue;
1951        } else {                                  /* it's not in the cache */
1952            int        cur, cur_nblocks, num_dirty, real_nblocks, num_needed;
1953            cache_ent *ents[NUM_FLUSH_BLOCKS];
1954
1955            /*
1956               here we find out how many additional blocks in this request
1957               are not in the cache.  the idea is that then we can do one
1958               big i/o on that many blocks at once.
1959            */
1960            for(cur_nblocks=1;
1961                cur_nblocks < num_blocks && cur_nblocks < NUM_FLUSH_BLOCKS;
1962                cur_nblocks++) {
1963
1964                /* we can call hash_lookup() directly instead of
1965                   block_lookup() because we don't care about the
1966                   state of the busy bit of the block at this point
1967                */
1968                if (hash_lookup(&bc.ht, dev, bnum + cur_nblocks))
1969                    break;
1970            }
1971
1972            /*
1973              here we try to figure out how many extra blocks we should read
1974              for read-ahead.  we want to read as many as possible that are
1975              not already in the cache and that don't cause us to try and
1976              read beyond the end of the disk.
1977            */
1978            if ((op & CACHE_READ) && (op & CACHE_READ_AHEAD_OK) &&
1979                (cur_nblocks * bsize) < read_ahead_size) {
1980
1981                for(num_needed=cur_nblocks;
1982                    num_needed < (read_ahead_size / bsize);
1983                    num_needed++) {
1984
1985                    if ((bnum + num_needed) >= max_device_blocks[dev])
1986                        break;
1987
1988                    if (hash_lookup(&bc.ht, dev, bnum + num_needed))
1989                        break;
1990                }
1991            } else {
1992                num_needed = cur_nblocks;
1993            }
1994
1995            /* this will get us pointers to a bunch of cache_ents we can use */
1996            get_ents(ents, num_needed, NUM_FLUSH_BLOCKS, &real_nblocks, bsize);
1997
1998            if (real_nblocks < num_needed) {
1999                beos_panic("don't have enough cache ents (need %d got %d %ld::%d)\n",
2000                      num_needed, real_nblocks, bnum, num_blocks);
2001            }
2002
2003            /*
2004              There are now three variables used as limits within the ents
2005              array.  This is how they are related:
2006
2007                 cur_nblocks <= num_needed <= real_nblocks
2008
2009              Ents from 0 to cur_nblocks-1 are going to be used to fulfill
2010              this IO request.  Ents from cur_nblocks to num_needed-1 are
2011              for read-ahead.  Ents from num_needed to real_nblocks are
2012              extra blocks that get_ents() asked us to flush.  Often (and
2013              always on writes) cur_nblocks == num_needed.
2014
2015              Below, we sort the list of ents so that when we flush them
2016              they go out in order.
2017            */
2018
2019            qsort(ents, real_nblocks, sizeof(cache_ent **), cache_ent_cmp);
2020
2021            /*
2022              delete each ent from its list because it will change.  also
2023              count up how many dirty blocks there are and insert into the
2024              hash table any new blocks so that no one else will try to
2025              read them in when we release the cache semaphore to do our I/O.
2026            */
2027            for(cur=0,num_dirty=0; cur < real_nblocks; cur++) {
2028                ce = ents[cur];
2029                ce->flags |= CE_BUSY;
2030
2031                /*
2032                   insert the new block into the hash table with its new block
2033                   number. note that the block is still in the hash table for
2034                   its old block number -- and it has to be until we are done
2035                   flushing it from the cache (to prevent someone else from
2036                   sneaking in in front of us and trying to read the same
2037                   block that we're flushing).
2038                */
2039                if (cur < num_needed) {
2040                    if (hash_insert(&bc.ht, dev, bnum + cur, ce) != 0)
2041                        beos_panic("could not insert cache ent for %d %ld (0x%lx)\n",
2042                              dev, bnum + cur, (ulong)ents[cur]);
2043                }
2044
2045                if (ce->dev == -1)
2046                    continue;
2047
2048                if ((ce->flags & CE_DIRTY) || ce->clone)
2049                    num_dirty++;
2050
2051                if (ce->lock) {
2052                    beos_panic("cbio: can't use locked blocks here ce @ 0x%x\n",ce);
2053                } else {
2054                    cel = &bc.normal;
2055	                delete_from_list(cel, ce);
2056				}
2057            }
2058            ce = NULL;
2059
2060
2061            /*
2062               we release the block cache semaphore here so that we can
2063               go do all the i/o we need to do (flushing dirty blocks
2064               that we're kicking out as well as reading any new data).
2065
2066               because all the blocks we're touching are marked busy
2067               no one else should mess with them while we're doing this.
2068            */
2069            if (num_dirty || (op & CACHE_READ)) {
2070                UNLOCK(bc.lock);
2071
2072                /* this flushes any blocks we're kicking out that are dirty */
2073                if (num_dirty && (err = flush_ents(ents, real_nblocks)) != 0) {
2074                    printf("flush ents failed (ents @ 0x%lx, nblocks %d!\n",
2075                           (ulong)ents, cur_nblocks);
2076                    goto handle_err;
2077                }
2078
2079            }
2080
2081            /*
2082               now that everything is flushed to disk, go through and
2083               make sure that the data blocks we're going to use are
2084               the right block size for this current request (it's
2085               possible we're kicking out some smaller blocks and need
2086               to reallocate the data block pointer). We do this in two
2087               steps, first free'ing everything and then going through
2088               and doing the malloc's to try and be nice to the memory
2089               system (i.e. allow it to coalesce stuff, etc).
2090            */
2091            err = 0;
2092            for(cur=0; cur < num_needed; cur++) {
2093                if (ents[cur]->bsize != bsize) {
2094                    free(ents[cur]->data);
2095                    ents[cur]->data = NULL;
2096
2097                    if (ents[cur]->clone) {
2098                        free(ents[cur]->clone);
2099                        ents[cur]->clone = NULL;
2100                    }
2101                }
2102            }
2103
2104            for(cur=0; cur < num_needed; cur++) {
2105                if (ents[cur]->data == NULL) {
2106                    ents[cur]->data  = (void *)malloc(bsize);
2107                    ents[cur]->bsize = bsize;
2108                }
2109
2110                if (ents[cur]->data == NULL) {
2111                    printf("cache: no memory for block (bsize %d)!\n",
2112                           bsize);
2113                    err = ENOMEM;
2114                    break;
2115                }
2116            }
2117
2118            /*
2119               if this condition is true it's a pretty serious error.
2120               we'll try and back out gracefully but we're in pretty
2121               deep at this point and it ain't going to be easy.
2122            */
2123  handle_err:
2124            if (err) {
2125                for(cur=0; cur < num_needed; cur++) {
2126                    cache_ent *tmp_ce;
2127
2128                    tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2129                    if (tmp_ce != ents[cur]) {
2130                        beos_panic("hash_del0: %d %ld got 0x%lx, not 0x%lx\n",
2131                                dev, bnum+cur, (ulong)tmp_ce,
2132                                (ulong)ents[cur]);
2133                    }
2134
2135                    tmp_ce = (cache_ent *)hash_delete(&bc.ht,ents[cur]->dev,
2136                                                        ents[cur]->block_num);
2137                    if (tmp_ce != ents[cur]) {
2138                        beos_panic("hash_del1: %d %ld got 0x%lx, not 0x%lx\n",
2139                                ents[cur]->dev, ents[cur]->block_num, (ulong)tmp_ce,
2140                                (ulong)ents[cur]);
2141                    }
2142
2143                    ents[cur]->flags &= ~CE_BUSY;
2144                    if (ents[cur]->data)
2145                        free(ents[cur]->data);
2146                    free(ents[cur]);
2147                    ents[cur] = NULL;
2148
2149                    bc.cur_blocks--;
2150                }
2151
2152                if (cur < real_nblocks) {
2153                    LOCK(bc.lock);
2154                    for(; cur < real_nblocks; cur++) {
2155                        ents[cur]->flags &= ~CE_BUSY;
2156
2157                        /* we have to put them back here */
2158                        add_to_tail(&bc.normal, ents[cur]);
2159                    }
2160                    UNLOCK(bc.lock);
2161                }
2162
2163                return ENOMEM;
2164            }
2165
2166
2167            /*
2168               If we go into this if statement, the block cache lock
2169               has *already been released* up above when we flushed the
2170               dirty entries.  As always, since the blocks we're mucking
2171               with are marked busy, they shouldn't get messed with.
2172            */
2173            err = 0;
2174            if (num_dirty || (op & CACHE_READ)) {
2175                /* this section performs the i/o that we need to do */
2176                if (op & CACHE_READ) {
2177                    err = read_into_ents(dev, bnum, ents, num_needed, bsize);
2178                } else {
2179                    err = 0;
2180                }
2181
2182                if (err != 0) {
2183                    printf("err %s on dev %d block %Ld:%d (%d) "
2184                           "data %p, ents[0] %p\n",
2185                           strerror(errno), dev, bnum, cur_nblocks,
2186                           bsize, data, ents[0]);
2187                }
2188
2189                /*
2190                   acquire the semaphore here so that we can go on mucking
2191                   with the cache data structures.  We need to delete old
2192                   block numbers from the hash table and set the new block
2193                   number's for the blocks we just read in.  We also put the
2194                   read-ahead blocks at the head of mru list.
2195                */
2196
2197                LOCK(bc.lock);
2198            }
2199
2200            for(cur=0; cur < num_needed; cur++) {
2201                cache_ent *tmp_ce;
2202
2203                ce = ents[cur];
2204                if (ce->dev != -1) {
2205                    tmp_ce = hash_delete(&bc.ht, ce->dev, ce->block_num);
2206                    if (tmp_ce == NULL || tmp_ce != ce) {
2207                        beos_panic("*** hash_delete failure (ce 0x%x tce 0x%x)\n",
2208                              ce, tmp_ce);
2209                    }
2210                }
2211
2212                if (err == 0 && cur >= cur_nblocks) {
2213                    ce->dev       = dev;
2214                    ce->block_num = bnum + cur;
2215                    ce->flags    &= ~CE_BUSY;
2216                    add_to_head(&bc.normal, ce);
2217                }
2218            }
2219            ce = NULL;
2220
2221            /*
2222              clear the busy bit on the blocks we force-flushed and
2223              put them on the normal list since they're now clean.
2224            */
2225            for(; cur < real_nblocks; cur++) {
2226                ents[cur]->flags &= ~CE_BUSY;
2227
2228                if (ents[cur]->lock)
2229                    beos_panic("should not have locked blocks here (ce 0x%x)\n",
2230                          ents[cur]);
2231
2232                add_to_tail(&bc.normal, ents[cur]);
2233            }
2234
2235            if (err) {   /* then we have some cleanup to do */
2236                for(cur=0; cur < num_needed; cur++) {
2237                    cache_ent *tmp_ce;
2238
2239                    /* we delete all blocks from the cache so we don't
2240                       leave partially written blocks in the cache */
2241
2242                    tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2243                    if (tmp_ce != ents[cur]) {
2244                        beos_panic("hash_del: %d %ld got 0x%lx, not 0x%lx\n",
2245                                dev, bnum+cur, (ulong)tmp_ce,
2246                                (ulong)ents[cur]);
2247                    }
2248
2249                    ce = ents[cur];
2250                    ce->flags &= ~CE_BUSY;
2251
2252                    free(ce->data);
2253                    ce->data = NULL;
2254
2255                    free(ce);
2256                    ents[cur] = NULL;
2257
2258                    bc.cur_blocks--;
2259                }
2260                ce = NULL;
2261
2262                UNLOCK(bc.lock);
2263                return err;
2264            }
2265
2266
2267            /*
2268               last step: go through and make sure all the cache_ent
2269               structures have the right data in them, delete old guys, etc.
2270            */
2271            for(cur=0; cur < cur_nblocks; cur++) {
2272                ce = ents[cur];
2273
2274                if (ce->dev != -1) {   /* then clean this guy up */
2275                    if (ce->next || ce->prev)
2276                        beos_panic("ce @ 0x%x should not be in a list yet!\n", ce);
2277
2278                    if (ce->clone)
2279                        free(ce->clone);
2280
2281                    if (ce->data == NULL)
2282                        beos_panic("ce @ 0x%lx has a null data ptr\n", (ulong)ce);
2283                }
2284
2285                ce->dev        = dev;
2286                ce->block_num  = bnum + cur;
2287                ce->bsize      = bsize;
2288                ce->flags      = CE_NORMAL;
2289                ce->lock       = 0;
2290                ce->clone      = NULL;
2291                ce->func = ce->arg  = NULL;
2292                ce->next = ce->prev = NULL;
2293
2294                if (op & CACHE_READ) {
2295                    if (data)
2296                        memcpy(data, ce->data, bsize);
2297                } else if (op & CACHE_WRITE) {
2298                    ce->flags  |= CE_DIRTY;
2299                    memcpy(ce->data, data, bsize);
2300                } else if (op & CACHE_NOOP) {
2301                    memset(ce->data, 0, bsize);
2302                    if (data)
2303                        memset(data, 0, bsize);
2304
2305                    ce->flags |= CE_DIRTY;
2306                }
2307
2308                if (op & CACHE_LOCKED) {
2309                    ce->lock++;
2310                    cel = &bc.locked;
2311                } else {
2312                    cel = &bc.normal;
2313                }
2314
2315                /* now stick this puppy at the head of the mru list */
2316                add_to_head(cel, ce);
2317
2318
2319                if (dataptr) {
2320                    *dataptr = ce->data;
2321                }
2322
2323                if (data != NULL)
2324                    data = (void *)((char *)data + bsize);
2325                else if (cur_nblocks != 1)
2326                    beos_panic("cache can't handle setting data_ptr twice!\n");
2327            }  /* end of for(cur=0; cur < cur_nblocks; cur++) */
2328
2329            bnum       += cur_nblocks;
2330            num_blocks -= cur_nblocks;
2331
2332        }   /* end of else it's not in the cache */
2333
2334    }   /* end of while(num_blocks) */
2335
2336    UNLOCK(bc.lock);
2337
2338    return 0;
2339}
2340
2341
2342void *
2343beos_get_block(int dev, fs_off_t bnum, int bsize)
2344{
2345    void *data;
2346
2347    if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_READ|CACHE_LOCKED|CACHE_READ_AHEAD_OK,
2348                       &data) != 0)
2349        return NULL;
2350
2351    return data;
2352}
2353
2354void *
2355beos_get_empty_block(int dev, fs_off_t bnum, int bsize)
2356{
2357    void *data;
2358
2359    if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_NOOP|CACHE_LOCKED,
2360                       &data) != 0)
2361        return NULL;
2362
2363    return data;
2364}
2365
2366int
2367beos_cached_read(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize)
2368{
2369    return cache_block_io(dev, bnum, data, num_blocks, bsize,
2370                          CACHE_READ | CACHE_READ_AHEAD_OK, NULL);
2371}
2372
2373
2374int
2375beos_cached_write(int dev, fs_off_t bnum, const void *data, fs_off_t num_blocks,int bsize)
2376{
2377    return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2378                          CACHE_WRITE, NULL);
2379}
2380
2381int
2382beos_cached_write_locked(int dev, fs_off_t bnum, const void *data,
2383                    fs_off_t num_blocks, int bsize)
2384{
2385    return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2386                          CACHE_WRITE | CACHE_LOCKED, NULL);
2387}
2388
2389
2390void
2391beos_force_cache_flush(int dev, int prefer_log_blocks)
2392{
2393    int        i, count = 0;
2394    cache_ent *ce;
2395    cache_ent *ents[NUM_FLUSH_BLOCKS];
2396
2397
2398    LOCK(bc.lock);
2399
2400    for(ce=bc.normal.lru; ce; ce=ce->next) {
2401        if ((ce->dev == dev) &&
2402            (ce->flags & CE_BUSY) == 0 &&
2403            ((ce->flags & CE_DIRTY) || ce->clone) &&
2404            ((prefer_log_blocks && ce->func) || (prefer_log_blocks == 0))) {
2405
2406            ce->flags |= CE_BUSY;
2407            ents[count++] = ce;
2408
2409            if (count >= NUM_FLUSH_BLOCKS) {
2410                break;
2411            }
2412        }
2413    }
2414
2415    /* if we've got some room left, try and grab any cloned blocks */
2416    if (count < NUM_FLUSH_BLOCKS) {
2417        for(ce=bc.locked.lru; ce; ce=ce->next) {
2418            if ((ce->dev == dev) &&
2419                (ce->flags & CE_BUSY) == 0 &&
2420                (ce->clone)) {
2421
2422                ce->flags |= CE_BUSY;
2423                ents[count++] = ce;
2424
2425                if (count >= NUM_FLUSH_BLOCKS) {
2426                    break;
2427                }
2428            }
2429        }
2430    }
2431
2432    UNLOCK(bc.lock);
2433
2434    if (count != 0) {
2435        qsort(ents, count, sizeof(cache_ent **), cache_ent_cmp);
2436        flush_ents(ents, count);
2437
2438        for(i=0; i < count; i++)
2439            ents[i]->flags &= ~CE_BUSY;
2440    }
2441}
2442