1/*
2   This file contains the global device cache for the BeOS.  All
3   file system I/O comes through here.  The cache can handle blocks
4   of different sizes for multiple different underlying physical
5   devices.
6
7   The cache is organized as a hash table (for lookups by device
8   and block number) and two doubly-linked lists.  The normal
9   list is for "normal" blocks which are either clean or dirty.
10   The locked list is for blocks that are locked in the cache by
11   BFS.  The lists are LRU ordered.
12
13   Most of the work happens in the function cache_block_io() which
14   is quite lengthy.  The other functions of interest are get_ents()
15   which picks victims to be kicked out of the cache; flush_ents()
16   which does the work of flushing them; and set_blocks_info() which
17   handles cloning blocks and setting callbacks so that the BFS
18   journal will work properly.  If you want to modify this code it
19   will take some study but it's not too bad.  Do not think about
20   separating the list of clean and dirty blocks into two lists as
21   I did that already and it's slower.
22
23   Originally this cache code was written while listening to the album
24   "Ride the Lightning" by Metallica.  The current version was written
25   while listening to "Welcome to SkyValley" by Kyuss as well as an
26   ambient collection on the German label FAX.  Music helps a lot when
27   writing code.
28
29   THIS CODE COPYRIGHT DOMINIC GIAMPAOLO.  NO WARRANTY IS EXPRESSED
30   OR IMPLIED.  YOU MAY USE THIS CODE AND FREELY DISTRIBUTE IT FOR
31   NON-COMMERCIAL USE AS LONG AS THIS NOTICE REMAINS ATTACHED.
32
33   FOR COMMERCIAL USE, CONTACT DOMINIC GIAMPAOLO (dbg@be.com).
34
35   Dominic Giampaolo
36   dbg@be.com
37*/
38
39#include <stdio.h>
40#include <stdlib.h>
41#include <memory.h>
42#include <string.h>
43#include <errno.h>
44#include <fcntl.h>
45#include <sys/types.h>
46#include <sys/uio.h>
47#include <unistd.h>
48
49
50#include <OS.h>
51#include <KernelExport.h>
52
53#include "fs_cache.h"
54#include "fs_cache_priv.h"
55#include "lock.h"
56
57
58
59#ifndef USER
60#define printf dprintf
61#endif
62#ifdef USER
63#define kprintf printf
64#endif
65
66
67typedef off_t fs_off_t;
68
69/* forward prototypes */
70static int   flush_ents(cache_ent **ents, int n_ents);
71
72//static int   do_dump(int argc, char **argv);
73//static int   do_find_block(int argc, char **argv);
74//static int   do_find_data(int argc, char **argv);
75//static void  cache_flusher(void *arg, int phase);
76
77
78int chatty_io = 0;
79
80#define CHUNK (512 * 1024)   /* a hack to work around scsi driver bugs */
81
82static void
83beos_panic(const char *format, ...)
84{
85    va_list     ap;
86
87    va_start(ap, format);
88    vfprintf(stderr, format, ap);
89    va_end(ap);
90
91    while (TRUE)
92        ;
93}
94
95size_t
96beos_read_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
97{
98    size_t ret = 0;
99    size_t sum;
100
101    if (chatty_io)
102        printf("R: %8" B_PRIdOFF " : %3d\n", bnum, num_blocks);
103
104    if (num_blocks * bsize < CHUNK)
105        ret = read_pos(fd, bnum * bsize, data, num_blocks * bsize);
106    else {
107        for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
108            ret = read_pos(fd, (bnum * bsize) + sum, data, CHUNK);
109            if (ret != CHUNK)
110                break;
111
112            data = (void *)((char *)data + CHUNK);
113        }
114
115        if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
116            ret = read_pos(fd, (bnum * bsize) + sum, data,
117                           (num_blocks * bsize) - sum);
118
119            if (ret == (num_blocks * bsize) - sum)
120                ret = num_blocks * bsize;
121        } else if (ret == CHUNK) {
122            ret = num_blocks * bsize;
123        }
124    }
125
126    if (ret == num_blocks * bsize)
127        return 0;
128    else
129        return EBADF;
130}
131
132size_t
133beos_write_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
134{
135    size_t ret = 0;
136    size_t sum;
137
138    if (chatty_io)
139        printf("W: %8" B_PRIdOFF " : %3d\n", bnum, num_blocks);
140
141    if (num_blocks * bsize < CHUNK)
142        ret = write_pos(fd, bnum * bsize, data, num_blocks * bsize);
143    else {
144        for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
145            ret = write_pos(fd, (bnum * bsize) + sum, data, CHUNK);
146            if (ret != CHUNK)
147                break;
148
149            data = (void *)((char *)data + CHUNK);
150        }
151
152        if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
153            ret = write_pos(fd, (bnum * bsize) + sum, data,
154                            (num_blocks * bsize) - sum);
155
156            if (ret == (num_blocks * bsize) - sum)
157                ret = num_blocks * bsize;
158        } else if (ret == CHUNK) {
159            ret = num_blocks * bsize;
160        }
161    }
162
163
164    if (ret == num_blocks * bsize)
165        return 0;
166    else
167        return EBADF;
168}
169
170//	#pragma mark -
171
172static int
173init_hash_table(hash_table *ht)
174{
175    ht->max   = HT_DEFAULT_MAX;
176    ht->mask  = ht->max - 1;
177    ht->num_elements = 0;
178
179    ht->table = (hash_ent **)calloc(ht->max, sizeof(hash_ent *));
180    if (ht->table == NULL)
181        return ENOMEM;
182
183    return 0;
184}
185
186
187static void
188shutdown_hash_table(hash_table *ht)
189{
190    int       i, hash_len;
191    hash_ent *he, *next;
192
193    for(i=0; i < ht->max; i++) {
194        he = ht->table[i];
195
196        for(hash_len=0; he; hash_len++, he=next) {
197            next = he->next;
198            free(he);
199        }
200    }
201
202    if (ht->table)
203        free(ht->table);
204    ht->table = NULL;
205}
206
207#if 0
208static void
209print_hash_stats(hash_table *ht)
210{
211    int       i, hash_len, max = -1, sum = 0;
212    hash_ent *he, *next;
213
214    for(i=0; i < ht->max; i++) {
215        he = ht->table[i];
216
217        for(hash_len=0; he; hash_len++, he=next) {
218            next = he->next;
219        }
220        if (hash_len)
221            printf("bucket %3d : %3d\n", i, hash_len);
222
223        sum += hash_len;
224        if (hash_len > max)
225            max = hash_len;
226    }
227
228    printf("max # of chains: %d,  average chain length %d\n", max,sum/ht->max);
229}
230#endif
231
232#define HASH(d, b)   ((((fs_off_t)d) << (sizeof(fs_off_t)*8 - 6)) | (b))
233
234static hash_ent *
235new_hash_ent(int dev, fs_off_t bnum, void *data)
236{
237    hash_ent *he;
238
239    he = (hash_ent *)malloc(sizeof(*he));
240    if (he == NULL)
241        return NULL;
242
243    he->hash_val = HASH(dev, bnum);
244    he->dev      = dev;
245    he->bnum     = bnum;
246    he->data     = data;
247    he->next     = NULL;
248
249    return he;
250}
251
252
253static int
254grow_hash_table(hash_table *ht)
255{
256    int        i, omax, newsize, newmask;
257    fs_off_t      hash;
258    hash_ent **new_table, *he, *next;
259
260    if (ht->max & ht->mask) {
261        printf("*** hashtable size %d or mask %d looks weird!\n", ht->max,
262               ht->mask);
263    }
264
265    omax    = ht->max;
266    newsize = omax * 2;        /* have to grow in powers of two */
267    newmask = newsize - 1;
268
269    new_table = (hash_ent **)calloc(newsize, sizeof(hash_ent *));
270    if (new_table == NULL)
271        return ENOMEM;
272
273    for(i=0; i < omax; i++) {
274        for(he=ht->table[i]; he; he=next) {
275            hash = he->hash_val & newmask;
276            next = he->next;
277
278            he->next        = new_table[hash];
279            new_table[hash] = he;
280        }
281    }
282
283    free(ht->table);
284    ht->table = new_table;
285    ht->max   = newsize;
286    ht->mask  = newmask;
287
288    return 0;
289}
290
291
292
293
294static int
295hash_insert(hash_table *ht, int dev, fs_off_t bnum, void *data)
296{
297    fs_off_t    hash;
298    hash_ent *he, *curr;
299
300    hash = HASH(dev, bnum) & ht->mask;
301
302    curr = ht->table[hash];
303    for(; curr != NULL; curr=curr->next)
304        if (curr->dev == dev && curr->bnum == bnum)
305            break;
306
307    if (curr && curr->dev == dev && curr->bnum == bnum) {
308        printf("entry %d:%" B_PRIdOFF " already in the hash table!\n",
309            dev, bnum);
310        return EEXIST;
311    }
312
313    he = new_hash_ent(dev, bnum, data);
314    if (he == NULL)
315        return ENOMEM;
316
317    he->next        = ht->table[hash];
318    ht->table[hash] = he;
319
320    ht->num_elements++;
321    if (ht->num_elements >= ((ht->max * 3) / 4)) {
322        if (grow_hash_table(ht) != 0)
323            return ENOMEM;
324    }
325
326    return 0;
327}
328
329static void *
330hash_lookup(hash_table *ht, int dev, fs_off_t bnum)
331{
332    hash_ent *he;
333
334    he = ht->table[HASH(dev, bnum) & ht->mask];
335
336    for(; he != NULL; he=he->next) {
337        if (he->dev == dev && he->bnum == bnum)
338            break;
339    }
340
341    if (he)
342        return he->data;
343    else
344        return NULL;
345}
346
347
348static void *
349hash_delete(hash_table *ht, int dev, fs_off_t bnum)
350{
351    void     *data;
352    fs_off_t     hash;
353    hash_ent *he, *prev = NULL;
354
355    hash = HASH(dev, bnum) & ht->mask;
356    he = ht->table[hash];
357
358    for(; he != NULL; prev=he,he=he->next) {
359        if (he->dev == dev && he->bnum == bnum)
360            break;
361    }
362
363    if (he == NULL) {
364        printf("*** hash_delete: tried to delete non-existent block %d:%"
365            B_PRIdOFF "\n", dev, bnum);
366        return NULL;
367    }
368
369    data = he->data;
370
371    if (ht->table[hash] == he)
372        ht->table[hash] = he->next;
373    else if (prev)
374        prev->next = he->next;
375    else
376        beos_panic("hash table is inconsistent\n");
377
378    free(he);
379    ht->num_elements--;
380
381    return data;
382}
383
384//	#pragma mark -
385
386/*
387  These are the global variables for the cache.
388*/
389static block_cache  bc;
390
391#define       MAX_IOVECS  64           /* # of iovecs for use by cache code */
392static beos_lock   iovec_lock;
393static struct iovec *iovec_pool[MAX_IOVECS];  /* each ptr is to an array of iovecs */
394static int    iovec_used[MAX_IOVECS];  /* non-zero == iovec is in use */
395
396#define NUM_FLUSH_BLOCKS 64    /* size of the iovec array pointed by each ptr */
397
398
399#define DEFAULT_READ_AHEAD_SIZE  (32 * 1024)
400static int read_ahead_size = DEFAULT_READ_AHEAD_SIZE;
401
402/* this array stores the size of each device so we can error check requests */
403#define MAX_DEVICES  256
404fs_off_t max_device_blocks[MAX_DEVICES];
405
406
407/* has the time of the last cache access so cache flushing doesn't interfere */
408static bigtime_t last_cache_access = 0;
409
410
411int
412beos_init_block_cache(int max_blocks, int flags)
413{
414    memset(&bc, 0, sizeof(bc));
415    memset(iovec_pool, 0, sizeof(iovec_pool));
416    memset(iovec_used, 0, sizeof(iovec_used));
417    memset(&max_device_blocks, 0, sizeof(max_device_blocks));
418
419    if (init_hash_table(&bc.ht) != 0)
420        return ENOMEM;
421
422    bc.lock.s = iovec_lock.s = -1;
423
424    bc.max_blocks = max_blocks;
425    bc.flags      = flags;
426    if (beos_new_lock(&bc.lock, "bollockcache") != 0)
427        goto err;
428
429    if (beos_new_lock(&iovec_lock, "iovec_lock") != 0)
430        goto err;
431
432    /* allocate two of these up front so vm won't accidently re-enter itself */
433    iovec_pool[0] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
434    iovec_pool[1] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
435
436#ifndef USER
437#ifdef DEBUG
438    add_debugger_command("bcache", do_dump, "dump the block cache list");
439    add_debugger_command("fblock", do_find_block, "find a block in the cache");
440    add_debugger_command("fdata",  do_find_data, "find a data block ptr in the cache");
441#endif
442    register_kernel_daemon(cache_flusher, NULL, 3);
443#endif
444
445    return 0;
446
447 err:
448    if (bc.lock.s >= 0)
449        beos_free_lock(&bc.lock);
450
451    if (iovec_lock.s >= 0)
452        beos_free_lock(&iovec_lock);
453
454    shutdown_hash_table(&bc.ht);
455    memset((void *)&bc, 0, sizeof(bc));
456    return ENOMEM;
457}
458
459
460static struct iovec *
461get_iovec_array(void)
462{
463    int i;
464    struct iovec *iov;
465
466    LOCK(iovec_lock);
467
468    for(i = 0; i < MAX_IOVECS; i++) {
469        if (iovec_used[i] == 0)
470            break;
471    }
472
473    if (i >= MAX_IOVECS)       /* uh-oh */
474        beos_panic("cache: ran out of iovecs (pool %p, used %p)!\n",
475              &iovec_pool[0], &iovec_used[0]);
476
477    if (iovec_pool[i] == NULL) {
478        iovec_pool[i] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
479        if (iovec_pool[i] == NULL)
480            beos_panic("can't allocate an iovec!\n");
481    }
482
483    iov = iovec_pool[i];
484    iovec_used[i] = 1;
485
486    UNLOCK(iovec_lock);
487
488    return iov;
489}
490
491
492static void
493release_iovec_array(struct iovec *iov)
494{
495    int i;
496
497    LOCK(iovec_lock);
498
499    for(i=0; i < MAX_IOVECS; i++) {
500        if (iov == iovec_pool[i])
501            break;
502    }
503
504    if (i < MAX_IOVECS)
505        iovec_used[i] = 0;
506    else                     /* uh-oh */
507        printf("cache: released an iovec I don't own (iov %p)\n", iov);
508
509
510    UNLOCK(iovec_lock);
511}
512
513
514
515
516static void
517real_dump_cache_list(cache_ent_list *cel)
518{
519    cache_ent *ce;
520
521    kprintf("starting from LRU end:\n");
522
523    for (ce = cel->lru; ce; ce = ce->next) {
524        kprintf("ce %p dev %2d bnum %6" B_PRIdOFF " lock %d flag %d arg %p "
525               "clone %p\n", ce, ce->dev, ce->block_num, ce->lock,
526               ce->flags, ce->arg, ce->clone);
527    }
528    kprintf("MRU end\n");
529}
530
531static void
532dump_cache_list(void)
533{
534    kprintf("NORMAL BLOCKS\n");
535    real_dump_cache_list(&bc.normal);
536
537    kprintf("LOCKED BLOCKS\n");
538    real_dump_cache_list(&bc.locked);
539
540    kprintf("cur blocks %d, max blocks %d ht @ %p\n", bc.cur_blocks,
541           bc.max_blocks, &bc.ht);
542}
543
544#if 0
545static void
546check_bcache(char *str)
547{
548    int count = 0;
549    cache_ent *ce, *prev = NULL;
550
551    LOCK(bc.lock);
552
553    for(ce=bc.normal.lru; ce; prev=ce, ce=ce->next) {
554        count++;
555    }
556
557    for(ce=bc.locked.lru; ce; prev=ce, ce=ce->next) {
558        count++;
559    }
560
561    if (count != bc.cur_blocks) {
562        if (count < bc.cur_blocks - 16)
563            beos_panic("%s: count == %d, cur_blocks %d, prev 0x%x\n",
564                    str, count, bc.cur_blocks, prev);
565        else
566            printf("%s: count == %d, cur_blocks %d, prev 0x%x\n",
567                    str, count, bc.cur_blocks, prev);
568    }
569
570    UNLOCK(bc.lock);
571}
572
573
574static void
575dump_lists(void)
576{
577    cache_ent *nce;
578
579    printf("LOCKED 0x%x  (tail 0x%x, head 0x%x)\n", &bc.locked,
580           bc.locked.lru, bc.locked.mru);
581    for(nce=bc.locked.lru; nce; nce=nce->next)
582        printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
583               nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
584               nce->func);
585
586    printf("NORMAL 0x%x  (tail 0x%x, head 0x%x)\n", &bc.normal,
587           bc.normal.lru, bc.normal.mru);
588    for(nce=bc.normal.lru; nce; nce=nce->next)
589        printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
590               nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
591               nce->func);
592}
593
594
595static void
596check_lists(void)
597{
598    cache_ent *ce, *prev, *oce;
599    cache_ent_list *cel;
600
601    cel = &bc.normal;
602    for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
603        for(oce=bc.locked.lru; oce; oce=oce->next) {
604            if (oce == ce) {
605                dump_lists();
606                beos_panic("1:ce @ 0x%x is in two lists(cel 0x%x &LOCKED)\n",ce,cel);
607            }
608        }
609    }
610    if (prev && prev != cel->mru) {
611        dump_lists();
612        beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
613              prev, cel);
614    }
615
616    cel = &bc.locked;
617    for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
618        for(oce=bc.normal.lru; oce; oce=oce->next) {
619            if (oce == ce) {
620                dump_lists();
621                beos_panic("3:ce @ 0x%x is in two lists(cel 0x%x & DIRTY)\n",ce,cel);
622            }
623        }
624    }
625    if (prev && prev != cel->mru) {
626        dump_lists();
627        beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
628              prev, cel);
629    }
630}
631#endif
632
633
634#ifdef DEBUG
635#if 0
636static int
637do_dump(int argc, char **argv)
638{
639    dump_cache_list();
640    return 1;
641}
642
643
644static int
645do_find_block(int argc, char **argv)
646{
647    int        i;
648    fs_off_t  bnum;
649    cache_ent *ce;
650
651    if (argc < 2) {
652        kprintf("%s: needs a block # argument\n", argv[0]);
653        return 1;
654    }
655
656    for(i=1; i < argc; i++) {
657        bnum = strtoul(argv[i], NULL, 0);
658
659        for(ce=bc.normal.lru; ce; ce=ce->next) {
660            if (ce->block_num == bnum) {
661                kprintf("found clean bnum %ld @ 0x%lx (data @ 0x%lx)\n",
662                        bnum, ce, ce->data);
663            }
664        }
665
666        for(ce=bc.locked.lru; ce; ce=ce->next) {
667            if (ce->block_num == bnum) {
668                kprintf("found locked bnum %ld @ 0x%lx (data @ 0x%lx)\n",
669                        bnum, ce, ce->data);
670            }
671        }
672    }
673
674    return 0;
675}
676
677static int
678do_find_data(int argc, char **argv)
679{
680    int        i;
681    void      *data;
682    cache_ent *ce;
683
684    if (argc < 2) {
685        kprintf("%s: needs a block # argument\n", argv[0]);
686        return 1;
687    }
688
689    for(i=1; i < argc; i++) {
690        data = (void *)strtoul(argv[i], NULL, 0);
691
692        for(ce=bc.normal.lru; ce; ce=ce->next) {
693            if (ce->data == data) {
694                kprintf("found normal data ptr for bnum %ld @ ce 0x%lx\n",
695                        ce->block_num, ce);
696            }
697        }
698
699        for(ce=bc.locked.lru; ce; ce=ce->next) {
700            if (ce->data == data) {
701                kprintf("found locked data ptr for bnum %ld @ ce 0x%lx\n",
702                        ce->block_num, ce);
703            }
704        }
705    }
706
707    return 0;
708}
709#endif
710#endif /* DEBUG */
711
712
713
714/*
715  this function detaches the cache_ent from the list.
716*/
717static void
718delete_from_list(cache_ent_list *cel, cache_ent *ce)
719{
720    if (ce->next)
721        ce->next->prev = ce->prev;
722    if (ce->prev)
723        ce->prev->next = ce->next;
724
725    if (cel->lru == ce)
726        cel->lru = ce->next;
727    if (cel->mru == ce)
728        cel->mru = ce->prev;
729
730    ce->next = NULL;
731    ce->prev = NULL;
732}
733
734
735
736/*
737  this function adds the cache_ent ce to the head of the
738  list (i.e. the MRU end).  the cache_ent should *not*
739  be in any lists.
740*/
741static void
742add_to_head(cache_ent_list *cel, cache_ent *ce)
743{
744if (ce->next != NULL || ce->prev != NULL) {
745    beos_panic("*** ath: ce has non-null next/prev ptr (ce %p nxt %p, prv %p)\n",
746           ce, ce->next, ce->prev);
747}
748
749    ce->next = NULL;
750    ce->prev = cel->mru;
751
752    if (cel->mru)
753        cel->mru->next = ce;
754    cel->mru = ce;
755
756    if (cel->lru == NULL)
757        cel->lru = ce;
758}
759
760
761/*
762  this function adds the cache_ent ce to the tail of the
763  list (i.e. the MRU end).  the cache_ent should *not*
764  be in any lists.
765*/
766static void
767add_to_tail(cache_ent_list *cel, cache_ent *ce)
768{
769if (ce->next != NULL || ce->prev != NULL) {
770    beos_panic("*** att: ce has non-null next/prev ptr (ce %p nxt %p, prv %p)\n",
771           ce, ce->next, ce->prev);
772}
773
774    ce->next = cel->lru;
775    ce->prev = NULL;
776
777    if (cel->lru)
778        cel->lru->prev = ce;
779    cel->lru = ce;
780
781    if (cel->mru == NULL)
782        cel->mru = ce;
783}
784
785
786static int
787cache_ent_cmp(const void *a, const void *b)
788{
789    fs_off_t  diff;
790    cache_ent *p1 = *(cache_ent **)a, *p2 = *(cache_ent **)b;
791
792    if (p1 == NULL || p2 == NULL)
793        beos_panic("cache_ent pointers are null?!? (a %p, b %p\n)\n", a, b);
794
795    if (p1->dev == p2->dev) {
796        diff = p1->block_num - p2->block_num;
797        return (int)diff;
798    } else {
799        return p1->dev - p2->dev;
800    }
801}
802
803#if 0
804// ToDo: add this to the fsh (as a background thread)?
805static void
806cache_flusher(void *arg, int phase)
807{
808    int    i, num_ents, err;
809    bigtime_t now = system_time();
810    static cache_ent *ce = NULL;
811    static cache_ent *ents[NUM_FLUSH_BLOCKS];
812
813    /*
814       if someone else was in the cache recently then just bail out so
815       we don't lock them out unnecessarily
816    */
817    if ((now - last_cache_access) < 1000000)
818        return;
819
820    LOCK(bc.lock);
821
822    ce = bc.normal.lru;
823
824    for(num_ents=0; ce && num_ents < NUM_FLUSH_BLOCKS; ce=ce->next) {
825        if (ce->flags & CE_BUSY)
826            continue;
827
828        if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
829            continue;
830
831        ents[num_ents] = ce;
832        ents[num_ents]->flags |= CE_BUSY;
833        num_ents++;
834    }
835
836    /* if we've got some room left over, look for cloned locked blocks */
837    if (num_ents < NUM_FLUSH_BLOCKS) {
838        ce = bc.locked.lru;
839
840        for(; num_ents < NUM_FLUSH_BLOCKS;) {
841            for(;
842                ce && ((ce->flags & CE_BUSY) || ce->clone == NULL);
843                ce=ce->next)
844                /* skip ents that meet the above criteria */;
845
846            if (ce == NULL)
847                break;
848
849            ents[num_ents] = ce;
850            ents[num_ents]->flags |= CE_BUSY;
851            ce = ce->next;
852            num_ents++;
853        }
854    }
855
856    UNLOCK(bc.lock);
857
858    if (num_ents == 0)
859        return;
860
861    qsort(ents, num_ents, sizeof(cache_ent **), cache_ent_cmp);
862
863    if ((err = flush_ents(ents, num_ents)) != 0) {
864        printf("flush ents failed (ents @ 0x%lx, num_ents %d!\n",
865               (ulong)ents, num_ents);
866    }
867
868    for(i=0; i < num_ents; i++) {       /* clear the busy bit on each of ent */
869        ents[i]->flags &= ~CE_BUSY;
870    }
871}
872#endif
873
874
875static int
876flush_cache_ent(cache_ent *ce)
877{
878    int   ret = 0;
879    void *data;
880
881    /* if true, then there's nothing to flush */
882    if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
883        return 0;
884
885    /* same thing here */
886    if (ce->clone == NULL && ce->lock != 0)
887        return 0;
888
889 restart:
890	if (ce->clone)
891		data = ce->clone;
892	else
893		data = ce->data;
894
895	if (chatty_io > 2) printf("flush: %7" B_PRIdOFF "\n", ce->block_num);
896	ret = beos_write_phys_blocks(ce->dev, ce->block_num, data, 1, ce->bsize);
897
898    if (ce->func) {
899        ce->func(ce->logged_bnum, 1, ce->arg);
900        ce->func = NULL;
901    }
902
903    if (ce->clone) {
904        free(ce->clone);
905        ce->clone = NULL;
906
907        if (ce->lock == 0 && (ce->flags & CE_DIRTY))
908            goto restart;     /* also write the real data ptr */
909    } else {
910        ce->flags &= ~CE_DIRTY;
911    }
912
913    return ret;
914}
915
916
917static int
918flush_ents(cache_ent **ents, int n_ents)
919{
920    int    i, j, k, ret = 0, bsize, iocnt, do_again = 0;
921    fs_off_t  start_bnum;
922    struct iovec *iov;
923
924    iov = get_iovec_array();
925    if (iov == NULL)
926        return ENOMEM;
927
928restart:
929    for(i=0; i < n_ents; i++) {
930        /* if true, then there's nothing to flush */
931        if ((ents[i]->flags & CE_DIRTY) == 0 && ents[i]->clone == NULL)
932            continue;
933
934        /* if true we can't touch the dirty data yet because it's locked */
935        if (ents[i]->clone == NULL && ents[i]->lock != 0)
936            continue;
937
938
939        bsize      = ents[i]->bsize;
940        start_bnum = ents[i]->block_num;
941
942        for(j=i+1; j < n_ents && (j - i) < NUM_FLUSH_BLOCKS; j++) {
943            if (ents[j]->dev != ents[i]->dev ||
944                ents[j]->block_num != start_bnum + (j - i))
945                break;
946
947            if (ents[j]->clone == NULL && ents[j]->lock != 0)
948                break;
949        }
950
951        if (j == i+1) {           /* only one block, just flush it directly */
952            if ((ret = flush_cache_ent(ents[i])) != 0)
953                break;
954            continue;
955        }
956
957
958        for(k=i,iocnt=0; k < j; k++,iocnt++) {
959            if (ents[k]->clone)
960                iov[iocnt].iov_base = ents[k]->clone;
961            else
962                iov[iocnt].iov_base = ents[k]->data;
963
964            iov[iocnt].iov_len = bsize;
965        }
966
967		if (chatty_io)
968	        printf("writev @ %" B_PRIdOFF " for %d blocks\n",
969                start_bnum, iocnt);
970
971        ret = writev_pos(ents[i]->dev, start_bnum * (fs_off_t)bsize,
972                         &iov[0], iocnt);
973        if (ret != iocnt*bsize) {
974            int idx;
975
976            printf("flush_ents: writev failed: "
977                "iocnt %d start bnum %" B_PRIdOFF " bsize %d, ret %d\n",
978                iocnt, start_bnum, bsize, ret);
979
980            for(idx=0; idx < iocnt; idx++)
981                printf("iov[%2d] = %p :: %ld\n", idx, iov[idx].iov_base,
982                       iov[idx].iov_len);
983
984            printf("error %s writing blocks %" B_PRIdOFF ":%d (%d != %d)\n",
985                   strerror(errno), start_bnum, iocnt, ret, iocnt*bsize);
986            ret = EINVAL;
987            break;
988        }
989        ret = 0;
990
991
992        for(k=i; k < j; k++) {
993            if (ents[k]->func) {
994                ents[k]->func(ents[k]->logged_bnum, 1, ents[k]->arg);
995                ents[k]->func = NULL;
996            }
997
998            if (ents[k]->clone) {
999                free(ents[k]->clone);
1000                ents[k]->clone = NULL;
1001            } else {
1002                ents[k]->flags &= ~CE_DIRTY;
1003            }
1004        }
1005
1006
1007        i = j - 1;  /* i gets incremented by the outer for loop */
1008    }
1009
1010    /*
1011       here we have to go back through and flush any blocks that are
1012       still dirty.  with an arched brow you astutely ask, "but how
1013       could this happen given the above loop?"  Ahhh young grasshopper
1014       I say, the path through the cache is long and twisty and fraught
1015       with peril.  The reason it can happen is that a block can be both
1016       cloned and dirty.  The above loop would only flush the cloned half
1017       of the data, not the main dirty block.  So we have to go back
1018       through and see if there are any blocks that are still dirty.  If
1019       there are we go back to the top of the function and do the whole
1020       thing over.  Kind of grody but it is necessary to insure the
1021       correctness of the log for the Be file system.
1022    */
1023    if (do_again == 0) {
1024        for(i=0; i < n_ents; i++) {
1025            if ((ents[i]->flags & CE_DIRTY) == 0 || ents[i]->lock)
1026                continue;
1027
1028            do_again = 1;
1029            break;
1030        }
1031
1032        if (do_again)
1033            goto restart;
1034    }
1035
1036    release_iovec_array(iov);
1037
1038
1039    return ret;
1040}
1041
1042static void
1043delete_cache_list(cache_ent_list *cel)
1044{
1045    void      *junk;
1046    cache_ent *ce, *next;
1047
1048    for (ce = cel->lru; ce; ce = next) {
1049        next = ce->next;
1050        if (ce->lock != 0) {
1051            if (ce->func)
1052                printf("*** shutdown_block_cache: "
1053                    "block %" B_PRIdOFF ", lock == %d (arg %p)!\n",
1054                    ce->block_num, ce->lock, ce->arg);
1055            else
1056                printf("*** shutdown_block_cache: "
1057                    "block %" B_PRIdOFF ", lock == %d!\n",
1058                    ce->block_num, ce->lock);
1059        }
1060
1061        if (ce->flags & CE_BUSY) {
1062            printf("* shutdown block cache: "
1063                "bnum %" B_PRIdOFF " is busy? ce %p\n", ce->block_num, ce);
1064        }
1065
1066        if ((ce->flags & CE_DIRTY) || ce->clone) {
1067            flush_cache_ent(ce);
1068        }
1069
1070        if (ce->clone)
1071            free(ce->clone);
1072        ce->clone = NULL;
1073
1074        if (ce->data)
1075            free(ce->data);
1076        ce->data = NULL;
1077
1078        if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1079            printf("*** free_device_cache: "
1080                "bad hash table entry %" B_PRIdOFF " %p != %p\n",
1081                ce->block_num, junk, ce);
1082        }
1083
1084        explicit_bzero(ce, sizeof(*ce));
1085        free(ce);
1086
1087        bc.cur_blocks--;
1088    }
1089}
1090
1091
1092void
1093beos_shutdown_block_cache(void)
1094{
1095    /* print_hash_stats(&bc.ht); */
1096
1097    if (bc.lock.s > 0)
1098        LOCK(bc.lock);
1099
1100#ifndef USER
1101    unregister_kernel_daemon(cache_flusher, NULL);
1102#endif
1103
1104    delete_cache_list(&bc.normal);
1105    delete_cache_list(&bc.locked);
1106
1107    bc.normal.lru = bc.normal.mru = NULL;
1108    bc.locked.lru = bc.locked.mru = NULL;
1109
1110    shutdown_hash_table(&bc.ht);
1111
1112    if (bc.lock.s > 0)
1113        beos_free_lock(&bc.lock);
1114    bc.lock.s = -1;
1115
1116    if (iovec_lock.s >= 0)
1117        beos_free_lock(&iovec_lock);
1118}
1119
1120
1121
1122int
1123beos_init_cache_for_device(int fd, fs_off_t max_blocks)
1124{
1125    int ret = 0;
1126
1127    if (fd >= MAX_DEVICES)
1128        return -1;
1129
1130    LOCK(bc.lock);
1131
1132    if (max_device_blocks[fd] != 0) {
1133        printf("device %d is already initialized!\n", fd);
1134        ret = -1;
1135    } else {
1136        max_device_blocks[fd] = max_blocks;
1137    }
1138
1139    UNLOCK(bc.lock);
1140
1141    return ret;
1142}
1143
1144
1145
1146/*
1147   this routine assumes that bc.lock has been acquired
1148*/
1149static cache_ent *
1150block_lookup(int dev, fs_off_t bnum)
1151{
1152    int        count = 0;
1153    cache_ent *ce;
1154
1155    while (1) {
1156        ce = hash_lookup(&bc.ht, dev, bnum);
1157        if (ce == NULL)
1158            return NULL;
1159
1160        if ((ce->flags & CE_BUSY) == 0) /* it's ok, break out and return it */
1161            break;
1162
1163        /* else, it's busy and we need to retry our lookup */
1164        UNLOCK(bc.lock);
1165
1166        snooze(5000);
1167        if (count++ == 5000) {  /* then a lot of time has elapsed */
1168            printf("block %" B_PRIdOFF " isn't coming un-busy (ce @ %p)\n",
1169                   ce->block_num, ce);
1170        }
1171
1172        LOCK(bc.lock);
1173    }
1174
1175    if (ce->flags & CE_BUSY)
1176        beos_panic("block lookup: returning a busy block @ %p?!?\n", ce);
1177
1178    return ce;
1179}
1180
1181
1182
1183int
1184beos_set_blocks_info(int dev, fs_off_t *blocks, int nblocks,
1185               void (*func)(fs_off_t bnum, size_t nblocks, void *arg), void *arg)
1186{
1187    int        i, j, cur;
1188    cache_ent *ce;
1189    cache_ent *ents[NUM_FLUSH_BLOCKS];
1190
1191    LOCK(bc.lock);
1192
1193
1194    for(i=0, cur=0; i < nblocks; i++) {
1195
1196        /* printf("sbi:   %ld (arg 0x%x)\n", blocks[i], arg); */
1197        ce = block_lookup(dev, blocks[i]);
1198        if (ce == NULL) {
1199            beos_panic("*** set_block_info can't find bnum %ld!\n", blocks[i]);
1200            UNLOCK(bc.lock);
1201            return ENOENT;   /* hopefully this doesn't happen... */
1202        }
1203
1204
1205        if (blocks[i] != ce->block_num || dev != ce->dev) {
1206            UNLOCK(bc.lock);
1207            beos_panic("** error1: looked up dev %d block %ld but found dev %d "
1208                    "bnum %ld\n", dev, blocks[i], ce->dev, ce->block_num);
1209            return EBADF;
1210        }
1211
1212        if (ce->lock == 0) {
1213            beos_panic("* set_block_info on bnum %ld (%d) but it's not locked!\n",
1214                    blocks[i], nblocks);
1215        }
1216
1217
1218        if ((ce->flags & CE_DIRTY) == 0) {
1219            beos_panic("*** set_block_info on non-dirty block bnum %ld (%d)!\n",
1220                    blocks[i], nblocks);
1221        }
1222
1223        ce->flags |= CE_BUSY;     /* mark all blocks as busy till we're done */
1224
1225        /* if there is cloned data, it needs to be flushed now */
1226        if (ce->clone && ce->func) {
1227            ents[cur++] = ce;
1228
1229            if (cur >= NUM_FLUSH_BLOCKS) {
1230                UNLOCK(bc.lock);
1231
1232                qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1233
1234                flush_ents(ents, cur);
1235
1236                LOCK(bc.lock);
1237                for(j=0; j < cur; j++)
1238                    ents[j]->flags &= ~CE_BUSY;
1239                cur = 0;
1240            }
1241        }
1242    }
1243
1244
1245    if (cur != 0) {
1246        UNLOCK(bc.lock);
1247
1248        qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1249
1250        flush_ents(ents, cur);
1251
1252        LOCK(bc.lock);
1253        for(j=0; j < cur; j++)
1254            ents[j]->flags &= ~CE_BUSY;
1255        cur = 0;
1256    }
1257
1258
1259    /* now go through and set the info that we were asked to */
1260    for (i = 0; i < nblocks; i++) {
1261        /* we can call hash_lookup() here because we know it's around */
1262        ce = hash_lookup(&bc.ht, dev, blocks[i]);
1263        if (ce == NULL) {
1264            beos_panic("*** set_block_info can't find bnum %lld!\n", blocks[i]);
1265            UNLOCK(bc.lock);
1266            return ENOENT;   /* hopefully this doesn't happen... */
1267        }
1268
1269        ce->flags &= ~(CE_DIRTY | CE_BUSY);
1270
1271        if (ce->func != NULL) {
1272            beos_panic("*** set_block_info non-null callback on bnum %lld\n",
1273                    ce->block_num);
1274        }
1275
1276        if (ce->clone != NULL) {
1277            beos_panic("*** ce->clone == %p, not NULL in set_block_info\n",
1278                    ce->clone);
1279        }
1280
1281        ce->clone = (void *)malloc(ce->bsize);
1282        if (ce->clone == NULL)
1283            beos_panic("*** can't clone bnum %lld (bsize %d)\n",
1284                    ce->block_num, ce->bsize);
1285
1286
1287        memcpy(ce->clone, ce->data, ce->bsize);
1288
1289        ce->func   = func;
1290        ce->arg    = arg;
1291
1292        ce->logged_bnum = blocks[i];
1293
1294        ce->lock--;
1295        if (ce->lock < 0) {
1296            printf("sbi: "
1297                "whoa nellie! ce @ %p (%" B_PRIdOFF ") has lock == %d\n",
1298                   ce, ce->block_num, ce->lock);
1299        }
1300
1301        if (ce->lock == 0) {
1302            delete_from_list(&bc.locked, ce);
1303            add_to_head(&bc.normal, ce);
1304        }
1305    }
1306
1307    UNLOCK(bc.lock);
1308
1309    return 0;
1310}
1311
1312
1313/* this function is only for use by flush_device() */
1314static void
1315do_flush(cache_ent **ents, int max)
1316{
1317    int i;
1318
1319    for(i=0; i < max; i++) {
1320        ents[i]->flags |= CE_BUSY;
1321    }
1322
1323    UNLOCK(bc.lock);
1324
1325    qsort(ents, max, sizeof(cache_ent **), cache_ent_cmp);
1326    flush_ents(ents, max);
1327
1328    LOCK(bc.lock);
1329    for(i=0; i < max; i++) {
1330        ents[i]->flags &= ~CE_BUSY;
1331    }
1332}
1333
1334int
1335beos_flush_device(int dev, int warn_locked)
1336{
1337    int cur;
1338    cache_ent *ce;
1339    cache_ent *ents[NUM_FLUSH_BLOCKS];
1340
1341    LOCK(bc.lock);
1342
1343    cur = 0;
1344    ce = bc.normal.lru;
1345    while (ce) {
1346        if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1347            ce = ce->next;
1348            continue;
1349        }
1350
1351        if ((ce->flags & CE_DIRTY) || ce->clone) {
1352            ents[cur++] = ce;
1353            if (cur >= NUM_FLUSH_BLOCKS) {
1354                do_flush(ents, cur);
1355
1356                ce = bc.normal.lru;
1357                cur = 0;
1358                continue;
1359            }
1360        }
1361
1362        ce = ce->next;
1363    }
1364
1365    if (cur != 0)
1366        do_flush(ents, cur);
1367
1368    cur = 0;
1369    ce = bc.locked.lru;
1370    while (ce) {
1371        if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1372            ce = ce->next;
1373            continue;
1374        }
1375
1376        if (ce->clone) {
1377            ents[cur++] = ce;
1378            if (cur >= NUM_FLUSH_BLOCKS) {
1379                do_flush(ents, cur);
1380
1381                ce = bc.locked.lru;
1382                cur = 0;
1383                continue;
1384            }
1385        }
1386
1387        ce = ce->next;
1388    }
1389
1390    if (cur != 0)
1391        do_flush(ents, cur);
1392
1393    UNLOCK(bc.lock);
1394
1395    return 0;
1396}
1397
1398
1399static void
1400real_remove_cached_blocks(int dev, int allow_writes, cache_ent_list *cel)
1401{
1402    void      *junk;
1403    cache_ent *ce, *next = NULL;
1404
1405    for(ce=cel->lru; ce; ce=next) {
1406        next = ce->next;
1407
1408        if (ce->dev != dev) {
1409            continue;
1410        }
1411
1412        if (ce->lock != 0 || (ce->flags & CE_BUSY)) {
1413            printf("*** remove_cached_dev: "
1414                "block %" B_PRIdOFF " has lock = %d, flags 0x%x! ce @ %p\n",
1415                ce->block_num, ce->lock, ce->flags, ce);
1416        }
1417
1418        if (allow_writes == ALLOW_WRITES &&
1419            ((ce->flags & CE_DIRTY) || ce->clone)) {
1420            ce->flags |= CE_BUSY;
1421            flush_cache_ent(ce);
1422            ce->flags &= ~CE_BUSY;
1423        }
1424
1425        /* unlink this guy */
1426        if (cel->lru == ce)
1427            cel->lru = ce->next;
1428
1429        if (cel->mru == ce)
1430            cel->mru = ce->prev;
1431
1432        if (ce->prev)
1433            ce->prev->next = ce->next;
1434        if (ce->next)
1435            ce->next->prev = ce->prev;
1436
1437        if (ce->clone)
1438            free(ce->clone);
1439        ce->clone = NULL;
1440
1441        if (ce->data)
1442            free(ce->data);
1443        ce->data = NULL;
1444
1445        if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1446            beos_panic("*** remove_cached_device: bad hash table entry %ld "
1447                   "%p != %p\n", ce->block_num, junk, ce);
1448        }
1449
1450        free(ce);
1451
1452        bc.cur_blocks--;
1453    }
1454
1455}
1456
1457int
1458beos_remove_cached_device_blocks(int dev, int allow_writes)
1459{
1460    LOCK(bc.lock);
1461
1462    real_remove_cached_blocks(dev, allow_writes, &bc.normal);
1463    real_remove_cached_blocks(dev, allow_writes, &bc.locked);
1464
1465    max_device_blocks[dev] = 0;
1466
1467    UNLOCK(bc.lock);
1468
1469    return 0;
1470}
1471
1472
1473
1474int
1475beos_flush_blocks(int dev, fs_off_t bnum, int nblocks)
1476{
1477    int        cur, i;
1478    cache_ent *ce;
1479    cache_ent *ents[NUM_FLUSH_BLOCKS];
1480
1481    if (nblocks == 0)   /* might as well check for this */
1482        return 0;
1483
1484    LOCK(bc.lock);
1485
1486    cur = 0;
1487    for(; nblocks > 0; nblocks--, bnum++) {
1488        ce = block_lookup(dev, bnum);
1489        if (ce == NULL)
1490            continue;
1491
1492        if (bnum != ce->block_num || dev != ce->dev) {
1493            UNLOCK(bc.lock);
1494            beos_panic("error2: looked up dev %d block %ld but found %d %ld\n",
1495                  dev, bnum, ce->dev, ce->block_num);
1496            return EBADF;
1497        }
1498
1499        if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
1500            continue;
1501
1502        ce->flags |= CE_BUSY;
1503        ents[cur++] = ce;
1504
1505        if (cur >= NUM_FLUSH_BLOCKS) {
1506            UNLOCK(bc.lock);
1507
1508            qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1509            flush_ents(ents, cur);
1510
1511            LOCK(bc.lock);
1512            for(i=0; i < cur; i++) {
1513                ents[i]->flags &= ~CE_BUSY;
1514            }
1515            cur = 0;
1516        }
1517    }
1518
1519    UNLOCK(bc.lock);
1520
1521    if (cur == 0)     /* nothing more to do */
1522        return 0;
1523
1524    /* flush out the last few buggers */
1525    qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1526    flush_ents(ents, cur);
1527
1528    for(i=0; i < cur; i++) {
1529        ents[i]->flags &= ~CE_BUSY;
1530    }
1531
1532    return 0;
1533}
1534
1535
1536int
1537beos_mark_blocks_dirty(int dev, fs_off_t bnum, int nblocks)
1538{
1539    int        ret = 0;
1540    cache_ent *ce;
1541
1542    LOCK(bc.lock);
1543
1544    while(nblocks > 0) {
1545        ce = block_lookup(dev, bnum);
1546        if (ce) {
1547            ce->flags |= CE_DIRTY;
1548            bnum      += 1;
1549            nblocks   -= 1;
1550        } else {     /* hmmm, that's odd, didn't find it */
1551            printf("** mark_blocks_diry couldn't find block %" B_PRIdOFF " "
1552                "(len %d)\n", bnum, nblocks);
1553            ret = ENOENT;
1554            break;
1555        }
1556    }
1557
1558    UNLOCK(bc.lock);
1559
1560    return ret;
1561}
1562
1563
1564
1565int
1566beos_release_block(int dev, fs_off_t bnum)
1567{
1568    cache_ent *ce;
1569
1570    /* printf("rlsb: %ld\n", bnum); */
1571    LOCK(bc.lock);
1572
1573    ce = block_lookup(dev, bnum);
1574    if (ce) {
1575        if (bnum != ce->block_num || dev != ce->dev) {
1576            beos_panic("*** error3: looked up dev %d block %ld but found %d %ld\n",
1577                    dev, bnum, ce->dev, ce->block_num);
1578            UNLOCK(bc.lock);
1579            return EBADF;
1580        }
1581
1582        ce->lock--;
1583
1584        if (ce->lock < 0) {
1585            printf("rlsb: whoa nellie! ce %" B_PRIdOFF " has lock == %d\n",
1586                   ce->block_num, ce->lock);
1587        }
1588
1589        if (ce->lock == 0) {
1590            delete_from_list(&bc.locked, ce);
1591            add_to_head(&bc.normal, ce);
1592        }
1593
1594    } else {     /* hmmm, that's odd, didn't find it */
1595        beos_panic("** release_block asked to find %ld but it's not here\n",
1596               bnum);
1597    }
1598
1599    UNLOCK(bc.lock);
1600
1601    return 0;
1602}
1603
1604
1605static cache_ent *
1606new_cache_ent(int bsize)
1607{
1608    cache_ent *ce;
1609
1610    ce = (cache_ent *)calloc(1, sizeof(cache_ent));
1611    if (ce == NULL) {
1612        beos_panic("*** error: cache can't allocate memory!\n");
1613        return NULL;
1614    }
1615
1616    ce->data = malloc(bsize);
1617    if (ce->data == NULL) {
1618        free(ce);
1619        beos_panic("** error cache can't allocate data memory\n");
1620        UNLOCK(bc.lock);
1621        return NULL;
1622    }
1623
1624    ce->dev       = -1;
1625    ce->block_num = -1;
1626
1627    return ce;
1628}
1629
1630
1631static void
1632get_ents(cache_ent **ents, int num_needed, int max, int *num_gotten, int bsize)
1633{
1634    int        cur, retry_counter = 0, max_retry = num_needed * 256;
1635    cache_ent *ce;
1636
1637    if (num_needed > max)
1638        beos_panic("get_ents: num_needed %d but max %d (doh!)\n", num_needed, max);
1639
1640    /* if the cache isn't full yet, just allocate the blocks */
1641    for(cur=0; bc.cur_blocks < bc.max_blocks && cur < num_needed; cur++) {
1642        ents[cur] = new_cache_ent(bsize);
1643        if (ents[cur] == NULL)
1644            break;
1645        bc.cur_blocks++;
1646    }
1647
1648    /* pluck off blocks from the LRU end of the normal list, keep trying too */
1649    while(cur < num_needed && retry_counter < max_retry) {
1650        for(ce=bc.normal.lru; ce && cur < num_needed; ce=ce->next) {
1651            if (ce->lock)
1652                beos_panic("get_ents: normal list has locked blocks (ce %p)\n",ce);
1653
1654            if (ce->flags & CE_BUSY)   /* don't touch busy blocks */
1655                continue;
1656
1657            ce->flags   |= CE_BUSY;
1658            ents[cur++]  = ce;
1659        }
1660
1661        if (cur < num_needed) {
1662            UNLOCK(bc.lock);
1663            snooze(10000);
1664            LOCK(bc.lock);
1665            retry_counter++;
1666        }
1667    }
1668
1669    if (cur < num_needed && retry_counter >= max_retry) {  /* oh shit! */
1670        dump_cache_list();
1671        UNLOCK(bc.lock);
1672        beos_panic("get_ents: waited too long; can't get enough ce's (c %d n %d)\n",
1673              cur, num_needed);
1674    }
1675
1676    /*
1677      If the last block is a dirty one, try to get more of 'em so
1678      that we can flush a bunch of blocks at once.
1679    */
1680    if (cur && cur < max &&
1681        ((ents[cur-1]->flags & CE_DIRTY) || ents[cur-1]->clone)) {
1682
1683        for(ce=ents[cur-1]->next; ce && cur < max; ce=ce->next) {
1684            if (ce->flags & CE_BUSY)   /* don't touch busy blocks */
1685                continue;
1686
1687            if (ce->lock)
1688                beos_panic("get_ents:2 dirty list has locked blocks (ce %p)\n",ce);
1689
1690            ce->flags   |= CE_BUSY;
1691            ents[cur++]  = ce;
1692        }
1693    }
1694
1695    *num_gotten = cur;
1696}
1697
1698
1699static int
1700read_into_ents(int dev, fs_off_t bnum, cache_ent **ents, int num, int bsize)
1701{
1702    int    i, ret;
1703    struct iovec *iov;
1704
1705    iov = get_iovec_array();
1706
1707    for (i = 0; i < num; i++) {
1708        iov[i].iov_base = ents[i]->data;
1709        iov[i].iov_len  = bsize;
1710    }
1711
1712	if (chatty_io > 2)
1713		printf("readv @ %" B_PRIdOFF " for %d blocks "
1714			"(at %" B_PRIdOFF ", block_size = %d)\n",
1715			bnum, num, bnum*bsize, bsize);
1716    ret = readv_pos(dev, bnum*bsize, iov, num);
1717
1718    release_iovec_array(iov);
1719
1720    if (ret != num*bsize) {
1721        printf("read_into_ents: asked to read %d bytes but got %d\n",
1722               num*bsize, ret);
1723        printf("*** iov @ %p (num %d)\n", iov, num);
1724        return EINVAL;
1725    } else
1726        return 0;
1727}
1728
1729
1730
1731#define CACHE_READ          0x0001
1732#define CACHE_WRITE         0x0002
1733#define CACHE_NOOP          0x0004     /* for getting empty blocks */
1734#define CACHE_LOCKED        0x0008
1735#define CACHE_READ_AHEAD_OK 0x0010     /* it's ok to do read-ahead */
1736
1737
1738static char *
1739op_to_str(int op)
1740{
1741    static char buff[128];
1742
1743    if (op & CACHE_READ)
1744        strcpy(buff, "READ");
1745    else if (op & CACHE_WRITE)
1746        strcpy(buff, "WRITE");
1747    else if (op & CACHE_NOOP)
1748        strcpy(buff, "NOP");
1749
1750    if (op & CACHE_LOCKED)
1751        strcat(buff, " LOCKED");
1752
1753    if (op & CACHE_READ_AHEAD_OK)
1754        strcat(buff, " (AHEAD)");
1755
1756    return buff;
1757}
1758
1759static int
1760cache_block_io(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize,
1761               int op, void **dataptr)
1762{
1763    size_t          err = 0;
1764    cache_ent      *ce;
1765    cache_ent_list *cel;
1766
1767    if (chatty_io > 1)
1768        printf("cbio: bnum = %" B_PRIdOFF ", num_blocks = %" B_PRIdOFF ", "
1769            "bsize = %d, op = %s\n", bnum, num_blocks, bsize, op_to_str(op));
1770
1771    /* some sanity checks first */
1772    if (bsize == 0)
1773        beos_panic("cache_io: block size == 0 for bnum %lld?!?\n", bnum);
1774
1775    if (num_blocks == 0)
1776        beos_panic("cache_io: bnum %lld has num_blocks == 0!\n", bnum);
1777
1778    if (data == NULL && dataptr == NULL) {
1779        printf("major butthead move: "
1780            "null data and dataptr! bnum %" B_PRIdOFF ":%" B_PRIdOFF "\n",
1781            bnum, num_blocks);
1782        return ENOMEM;
1783    }
1784
1785    if (data == NULL) {
1786        if (num_blocks != 1)    /* get_block() should never do that */
1787            beos_panic("cache_io: num_blocks %lld but should be 1\n",
1788                  num_blocks);
1789
1790        if (op & CACHE_WRITE)
1791            beos_panic("cache_io: get_block() asked to write?!?\n");
1792    }
1793
1794    if (bnum + num_blocks > max_device_blocks[dev]) {
1795        printf("dev %d: "
1796            "access to blocks %" B_PRIdOFF ":%" B_PRIdOFF " "
1797            "but max_dev_blocks is %" B_PRIdOFF "\n",
1798            dev, bnum, num_blocks, max_device_blocks[dev]);
1799
1800		// let the app crash here
1801		*(int *)0x3100 = 0xc0debabe;
1802        return EINVAL;
1803    }
1804
1805    last_cache_access = system_time();
1806
1807    /* if the i/o is greater than 64k, do it directly */
1808    if (num_blocks * bsize >= 64 * 1024) {
1809        char  *ptr;
1810        fs_off_t  tmp;
1811
1812        if (data == NULL || (op & CACHE_LOCKED)) {
1813            beos_panic("*** asked to do a large locked io that's too hard!\n");
1814        }
1815
1816
1817        if (op & CACHE_READ) {
1818            if (beos_read_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1819                printf("cache read:read_phys_blocks failed "
1820                    "(%s on blocks %" B_PRIdOFF ":%" B_PRIdOFF ")!\n",
1821                     strerror(errno), bnum, num_blocks);
1822                return EINVAL;
1823            }
1824
1825            LOCK(bc.lock);
1826
1827            /* if any of the blocks are in the cache, grab them instead */
1828            ptr = data;
1829            for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1830                ce = block_lookup(dev, tmp);
1831                /*
1832                    if we find a block in the cache we have to copy its
1833                    data just in case it is more recent than what we just
1834                    read from disk (which could happen if someone wrote
1835                    these blocks after we did the read but before we locked
1836                    the cache and entered this loop).
1837                */
1838                if (ce) {
1839                    if (tmp != ce->block_num || dev != ce->dev) {
1840                        UNLOCK(bc.lock);
1841                        beos_panic("*** error4: looked up dev %d block %lld but "
1842                                "found %d %lld\n", dev, tmp, ce->dev,
1843                                ce->block_num);
1844                    }
1845
1846                    memcpy(ptr, ce->data, bsize);
1847                }
1848            }
1849
1850            UNLOCK(bc.lock);
1851        } else if (op & CACHE_WRITE) {
1852            LOCK(bc.lock);
1853
1854            /* if any of the blocks are in the cache, update them too */
1855            ptr = data;
1856            for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1857                ce = block_lookup(dev, tmp);
1858                if (ce) {
1859                    if (tmp != ce->block_num || dev != ce->dev) {
1860                        UNLOCK(bc.lock);
1861                        beos_panic("*** error5: looked up dev %d block %lld but "
1862                                "found %d %lld\n", dev, tmp, ce->dev,
1863                                ce->block_num);
1864                        return EBADF;
1865                    }
1866
1867                    /* XXXdbg -- this isn't strictly necessary */
1868                    if (ce->clone) {
1869                        printf("over-writing cloned data "
1870                            "(ce %p bnum %" B_PRIdOFF ")...\n", ce, tmp);
1871                        flush_cache_ent(ce);
1872                    }
1873
1874                    /* copy the data into the cache */
1875                    memcpy(ce->data, ptr, bsize);
1876                }
1877            }
1878
1879            UNLOCK(bc.lock);
1880
1881            if (beos_write_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1882                printf("cache write: write_phys_blocks failed (%s on blocks "
1883                    "%" B_PRIdOFF ":%" B_PRIdOFF ")!\n",
1884                    strerror(errno), bnum, num_blocks);
1885                return EINVAL;
1886            }
1887        } else {
1888            printf("bad cache op %d "
1889                "(bnum %" B_PRIdOFF " nblocks %" B_PRIdOFF ")\n",
1890                op, bnum, num_blocks);
1891            return EINVAL;
1892        }
1893
1894        return 0;
1895    }
1896
1897
1898    LOCK(bc.lock);
1899    while(num_blocks) {
1900
1901        ce = block_lookup(dev, bnum);
1902        if (ce) {
1903            if (bnum != ce->block_num || dev != ce->dev) {
1904                UNLOCK(bc.lock);
1905                beos_panic("*** error6: looked up dev %d block %ld but found "
1906                        "%d %ld\n", dev, bnum, ce->dev, ce->block_num);
1907                return EBADF;
1908            }
1909
1910            if (bsize != ce->bsize) {
1911                beos_panic("*** requested bsize %d but ce->bsize %d ce @ %p\n",
1912                        bsize, ce->bsize, ce);
1913            }
1914
1915            /* delete this ent from the list it is in because it may change */
1916            if (ce->lock)
1917                cel = &bc.locked;
1918            else
1919                cel = &bc.normal;
1920
1921            delete_from_list(cel, ce);
1922
1923            if (op & CACHE_READ) {
1924                if (data && data != ce->data) {
1925                    memcpy(data, ce->data, bsize);
1926                } else if (dataptr) {
1927                    *dataptr = ce->data;
1928                } else {
1929                    printf("cbio:data %p dptr %p ce @ %p ce->data %p\n",
1930                           data, dataptr, ce, ce->data);
1931                }
1932            } else if (op & CACHE_WRITE) {
1933                if (data && data != ce->data)
1934                    memcpy(ce->data, data, bsize);
1935
1936                ce->flags |= CE_DIRTY;
1937            } else if (op & CACHE_NOOP) {
1938                memset(ce->data, 0, bsize);
1939                if (data)
1940                    memset(data, 0, bsize);
1941
1942                if (dataptr)
1943                    *dataptr = ce->data;
1944
1945                ce->flags |= CE_DIRTY;
1946            } else {
1947                beos_panic("cached_block_io: bogus op %d\n", op);
1948            }
1949
1950            if (op & CACHE_LOCKED)
1951                ce->lock++;
1952
1953            if (ce->lock)
1954                cel = &bc.locked;
1955            else
1956                cel = &bc.normal;
1957
1958            /* now put this ent at the head of the appropriate list */
1959            add_to_head(cel, ce);
1960
1961            if (data != NULL)
1962                data = (void *)((char *)data + bsize);
1963
1964            bnum       += 1;
1965            num_blocks -= 1;
1966
1967            continue;
1968        } else {                                  /* it's not in the cache */
1969            int        cur, cur_nblocks, num_dirty, real_nblocks, num_needed;
1970            cache_ent *ents[NUM_FLUSH_BLOCKS];
1971
1972            /*
1973               here we find out how many additional blocks in this request
1974               are not in the cache.  the idea is that then we can do one
1975               big i/o on that many blocks at once.
1976            */
1977            for(cur_nblocks=1;
1978                cur_nblocks < num_blocks && cur_nblocks < NUM_FLUSH_BLOCKS;
1979                cur_nblocks++) {
1980
1981                /* we can call hash_lookup() directly instead of
1982                   block_lookup() because we don't care about the
1983                   state of the busy bit of the block at this point
1984                */
1985                if (hash_lookup(&bc.ht, dev, bnum + cur_nblocks))
1986                    break;
1987            }
1988
1989            /*
1990              here we try to figure out how many extra blocks we should read
1991              for read-ahead.  we want to read as many as possible that are
1992              not already in the cache and that don't cause us to try and
1993              read beyond the end of the disk.
1994            */
1995            if ((op & CACHE_READ) && (op & CACHE_READ_AHEAD_OK) &&
1996                (cur_nblocks * bsize) < read_ahead_size) {
1997
1998                for(num_needed=cur_nblocks;
1999                    num_needed < (read_ahead_size / bsize);
2000                    num_needed++) {
2001
2002                    if ((bnum + num_needed) >= max_device_blocks[dev])
2003                        break;
2004
2005                    if (hash_lookup(&bc.ht, dev, bnum + num_needed))
2006                        break;
2007                }
2008            } else {
2009                num_needed = cur_nblocks;
2010            }
2011
2012            /* this will get us pointers to a bunch of cache_ents we can use */
2013            get_ents(ents, num_needed, NUM_FLUSH_BLOCKS, &real_nblocks, bsize);
2014
2015            if (real_nblocks < num_needed) {
2016                beos_panic("don't have enough cache ents "
2017                     "(need %d got %d %ld::%" B_PRIdOFF ")\n",
2018                      num_needed, real_nblocks, bnum, num_blocks);
2019            }
2020
2021            /*
2022              There are now three variables used as limits within the ents
2023              array.  This is how they are related:
2024
2025                 cur_nblocks <= num_needed <= real_nblocks
2026
2027              Ents from 0 to cur_nblocks-1 are going to be used to fulfill
2028              this IO request.  Ents from cur_nblocks to num_needed-1 are
2029              for read-ahead.  Ents from num_needed to real_nblocks are
2030              extra blocks that get_ents() asked us to flush.  Often (and
2031              always on writes) cur_nblocks == num_needed.
2032
2033              Below, we sort the list of ents so that when we flush them
2034              they go out in order.
2035            */
2036
2037            qsort(ents, real_nblocks, sizeof(cache_ent **), cache_ent_cmp);
2038
2039            /*
2040              delete each ent from its list because it will change.  also
2041              count up how many dirty blocks there are and insert into the
2042              hash table any new blocks so that no one else will try to
2043              read them in when we release the cache semaphore to do our I/O.
2044            */
2045            for(cur=0,num_dirty=0; cur < real_nblocks; cur++) {
2046                ce = ents[cur];
2047                ce->flags |= CE_BUSY;
2048
2049                /*
2050                   insert the new block into the hash table with its new block
2051                   number. note that the block is still in the hash table for
2052                   its old block number -- and it has to be until we are done
2053                   flushing it from the cache (to prevent someone else from
2054                   sneaking in in front of us and trying to read the same
2055                   block that we're flushing).
2056                */
2057                if (cur < num_needed) {
2058                    if (hash_insert(&bc.ht, dev, bnum + cur, ce) != 0)
2059                        beos_panic("could not insert cache ent for %d %ld (%p)\n",
2060                              dev, bnum + cur, ents[cur]);
2061                }
2062
2063                if (ce->dev == -1)
2064                    continue;
2065
2066                if ((ce->flags & CE_DIRTY) || ce->clone)
2067                    num_dirty++;
2068
2069                if (ce->lock) {
2070                    beos_panic("cbio: can't use locked blocks here ce @ %p\n",ce);
2071                } else {
2072                    cel = &bc.normal;
2073	                delete_from_list(cel, ce);
2074				}
2075            }
2076            ce = NULL;
2077
2078
2079            /*
2080               we release the block cache semaphore here so that we can
2081               go do all the i/o we need to do (flushing dirty blocks
2082               that we're kicking out as well as reading any new data).
2083
2084               because all the blocks we're touching are marked busy
2085               no one else should mess with them while we're doing this.
2086            */
2087            if (num_dirty || (op & CACHE_READ)) {
2088                UNLOCK(bc.lock);
2089
2090                /* this flushes any blocks we're kicking out that are dirty */
2091                if (num_dirty && (err = flush_ents(ents, real_nblocks)) != 0) {
2092                    printf("flush ents failed (ents @ %p, nblocks %d!\n",
2093                           ents, cur_nblocks);
2094                    goto handle_err;
2095                }
2096
2097            }
2098
2099            /*
2100               now that everything is flushed to disk, go through and
2101               make sure that the data blocks we're going to use are
2102               the right block size for this current request (it's
2103               possible we're kicking out some smaller blocks and need
2104               to reallocate the data block pointer). We do this in two
2105               steps, first free'ing everything and then going through
2106               and doing the malloc's to try and be nice to the memory
2107               system (i.e. allow it to coalesce stuff, etc).
2108            */
2109            err = 0;
2110            for(cur=0; cur < num_needed; cur++) {
2111                if (ents[cur]->bsize != bsize) {
2112                    free(ents[cur]->data);
2113                    ents[cur]->data = NULL;
2114
2115                    if (ents[cur]->clone) {
2116                        free(ents[cur]->clone);
2117                        ents[cur]->clone = NULL;
2118                    }
2119                }
2120            }
2121
2122            for(cur=0; cur < num_needed; cur++) {
2123                if (ents[cur]->data == NULL) {
2124                    ents[cur]->data  = (void *)malloc(bsize);
2125                    ents[cur]->bsize = bsize;
2126                }
2127
2128                if (ents[cur]->data == NULL) {
2129                    printf("cache: no memory for block (bsize %d)!\n",
2130                           bsize);
2131                    err = ENOMEM;
2132                    break;
2133                }
2134            }
2135
2136            /*
2137               if this condition is true it's a pretty serious error.
2138               we'll try and back out gracefully but we're in pretty
2139               deep at this point and it ain't going to be easy.
2140            */
2141  handle_err:
2142            if (err) {
2143                for(cur=0; cur < num_needed; cur++) {
2144                    cache_ent *tmp_ce;
2145
2146                    tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2147                    if (tmp_ce != ents[cur]) {
2148                        beos_panic("hash_del0: %d %ld got %p, not %p\n",
2149                                dev, bnum+cur, tmp_ce,
2150                                ents[cur]);
2151                    }
2152
2153                    tmp_ce = (cache_ent *)hash_delete(&bc.ht,ents[cur]->dev,
2154                                                        ents[cur]->block_num);
2155                    if (tmp_ce != ents[cur]) {
2156                        beos_panic("hash_del1: %d %ld got %p, not %p\n",
2157                                ents[cur]->dev, ents[cur]->block_num, tmp_ce,
2158                                ents[cur]);
2159                    }
2160
2161                    ents[cur]->flags &= ~CE_BUSY;
2162                    if (ents[cur]->data)
2163                        free(ents[cur]->data);
2164                    free(ents[cur]);
2165                    ents[cur] = NULL;
2166
2167                    bc.cur_blocks--;
2168                }
2169
2170                if (cur < real_nblocks) {
2171                    LOCK(bc.lock);
2172                    for(; cur < real_nblocks; cur++) {
2173                        ents[cur]->flags &= ~CE_BUSY;
2174
2175                        /* we have to put them back here */
2176                        add_to_tail(&bc.normal, ents[cur]);
2177                    }
2178                    UNLOCK(bc.lock);
2179                }
2180
2181                return ENOMEM;
2182            }
2183
2184
2185            /*
2186               If we go into this if statement, the block cache lock
2187               has *already been released* up above when we flushed the
2188               dirty entries.  As always, since the blocks we're mucking
2189               with are marked busy, they shouldn't get messed with.
2190            */
2191            err = 0;
2192            if (num_dirty || (op & CACHE_READ)) {
2193                /* this section performs the i/o that we need to do */
2194                if (op & CACHE_READ) {
2195                    err = read_into_ents(dev, bnum, ents, num_needed, bsize);
2196                } else {
2197                    err = 0;
2198                }
2199
2200                if (err != 0) {
2201                    printf("err %s on dev %d block %" B_PRIdOFF ":%d (%d) "
2202                           "data %p, ents[0] %p\n",
2203                           strerror(errno), dev, bnum, cur_nblocks,
2204                           bsize, data, ents[0]);
2205                }
2206
2207                /*
2208                   acquire the semaphore here so that we can go on mucking
2209                   with the cache data structures.  We need to delete old
2210                   block numbers from the hash table and set the new block
2211                   number's for the blocks we just read in.  We also put the
2212                   read-ahead blocks at the head of mru list.
2213                */
2214
2215                LOCK(bc.lock);
2216            }
2217
2218            for(cur=0; cur < num_needed; cur++) {
2219                cache_ent *tmp_ce;
2220
2221                ce = ents[cur];
2222                if (ce->dev != -1) {
2223                    tmp_ce = hash_delete(&bc.ht, ce->dev, ce->block_num);
2224                    if (tmp_ce == NULL || tmp_ce != ce) {
2225                        beos_panic("*** hash_delete failure (ce %p tce %p)\n",
2226                              ce, tmp_ce);
2227                    }
2228                }
2229
2230                if (err == 0 && cur >= cur_nblocks) {
2231                    ce->dev       = dev;
2232                    ce->block_num = bnum + cur;
2233                    ce->flags    &= ~CE_BUSY;
2234                    add_to_head(&bc.normal, ce);
2235                }
2236            }
2237            ce = NULL;
2238
2239            /*
2240              clear the busy bit on the blocks we force-flushed and
2241              put them on the normal list since they're now clean.
2242            */
2243            for(; cur < real_nblocks; cur++) {
2244                ents[cur]->flags &= ~CE_BUSY;
2245
2246                if (ents[cur]->lock)
2247                    beos_panic("should not have locked blocks here (ce %p)\n",
2248                          ents[cur]);
2249
2250                add_to_tail(&bc.normal, ents[cur]);
2251            }
2252
2253            if (err) {   /* then we have some cleanup to do */
2254                for(cur=0; cur < num_needed; cur++) {
2255                    cache_ent *tmp_ce;
2256
2257                    /* we delete all blocks from the cache so we don't
2258                       leave partially written blocks in the cache */
2259
2260                    tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2261                    if (tmp_ce != ents[cur]) {
2262                        beos_panic("hash_del: %d %ld got %p, not %p\n",
2263                                dev, bnum+cur, tmp_ce,
2264                                ents[cur]);
2265                    }
2266
2267                    ce = ents[cur];
2268                    ce->flags &= ~CE_BUSY;
2269
2270                    free(ce->data);
2271                    ce->data = NULL;
2272
2273                    free(ce);
2274                    ents[cur] = NULL;
2275
2276                    bc.cur_blocks--;
2277                }
2278                ce = NULL;
2279
2280                UNLOCK(bc.lock);
2281                return err;
2282            }
2283
2284
2285            /*
2286               last step: go through and make sure all the cache_ent
2287               structures have the right data in them, delete old guys, etc.
2288            */
2289            for(cur=0; cur < cur_nblocks; cur++) {
2290                ce = ents[cur];
2291
2292                if (ce->dev != -1) {   /* then clean this guy up */
2293                    if (ce->next || ce->prev)
2294                        beos_panic("ce @ %p should not be in a list yet!\n", ce);
2295
2296                    if (ce->clone)
2297                        free(ce->clone);
2298
2299                    if (ce->data == NULL)
2300                        beos_panic("ce @ %p has a null data ptr\n", ce);
2301                }
2302
2303                ce->dev        = dev;
2304                ce->block_num  = bnum + cur;
2305                ce->bsize      = bsize;
2306                ce->flags      = CE_NORMAL;
2307                ce->lock       = 0;
2308                ce->clone      = NULL;
2309                ce->func = ce->arg  = NULL;
2310                ce->next = ce->prev = NULL;
2311
2312                if (op & CACHE_READ) {
2313                    if (data)
2314                        memcpy(data, ce->data, bsize);
2315                } else if (op & CACHE_WRITE) {
2316                    ce->flags  |= CE_DIRTY;
2317                    memcpy(ce->data, data, bsize);
2318                } else if (op & CACHE_NOOP) {
2319                    memset(ce->data, 0, bsize);
2320                    if (data)
2321                        memset(data, 0, bsize);
2322
2323                    ce->flags |= CE_DIRTY;
2324                }
2325
2326                if (op & CACHE_LOCKED) {
2327                    ce->lock++;
2328                    cel = &bc.locked;
2329                } else {
2330                    cel = &bc.normal;
2331                }
2332
2333                /* now stick this puppy at the head of the mru list */
2334                add_to_head(cel, ce);
2335
2336
2337                if (dataptr) {
2338                    *dataptr = ce->data;
2339                }
2340
2341                if (data != NULL)
2342                    data = (void *)((char *)data + bsize);
2343                else if (cur_nblocks != 1)
2344                    beos_panic("cache can't handle setting data_ptr twice!\n");
2345            }  /* end of for(cur=0; cur < cur_nblocks; cur++) */
2346
2347            bnum       += cur_nblocks;
2348            num_blocks -= cur_nblocks;
2349
2350        }   /* end of else it's not in the cache */
2351
2352    }   /* end of while(num_blocks) */
2353
2354    UNLOCK(bc.lock);
2355
2356    return 0;
2357}
2358
2359
2360void *
2361beos_get_block(int dev, fs_off_t bnum, int bsize)
2362{
2363    void *data;
2364
2365    if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_READ|CACHE_LOCKED|CACHE_READ_AHEAD_OK,
2366                       &data) != 0)
2367        return NULL;
2368
2369    return data;
2370}
2371
2372void *
2373beos_get_empty_block(int dev, fs_off_t bnum, int bsize)
2374{
2375    void *data;
2376
2377    if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_NOOP|CACHE_LOCKED,
2378                       &data) != 0)
2379        return NULL;
2380
2381    return data;
2382}
2383
2384int
2385beos_cached_read(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize)
2386{
2387    return cache_block_io(dev, bnum, data, num_blocks, bsize,
2388                          CACHE_READ | CACHE_READ_AHEAD_OK, NULL);
2389}
2390
2391
2392int
2393beos_cached_write(int dev, fs_off_t bnum, const void *data, fs_off_t num_blocks,int bsize)
2394{
2395    return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2396                          CACHE_WRITE, NULL);
2397}
2398
2399int
2400beos_cached_write_locked(int dev, fs_off_t bnum, const void *data,
2401                    fs_off_t num_blocks, int bsize)
2402{
2403    return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2404                          CACHE_WRITE | CACHE_LOCKED, NULL);
2405}
2406
2407
2408void
2409beos_force_cache_flush(int dev, int prefer_log_blocks)
2410{
2411    int        i, count = 0;
2412    cache_ent *ce;
2413    cache_ent *ents[NUM_FLUSH_BLOCKS];
2414
2415
2416    LOCK(bc.lock);
2417
2418    for(ce=bc.normal.lru; ce; ce=ce->next) {
2419        if ((ce->dev == dev) &&
2420            (ce->flags & CE_BUSY) == 0 &&
2421            ((ce->flags & CE_DIRTY) || ce->clone) &&
2422            ((prefer_log_blocks && ce->func) || (prefer_log_blocks == 0))) {
2423
2424            ce->flags |= CE_BUSY;
2425            ents[count++] = ce;
2426
2427            if (count >= NUM_FLUSH_BLOCKS) {
2428                break;
2429            }
2430        }
2431    }
2432
2433    /* if we've got some room left, try and grab any cloned blocks */
2434    if (count < NUM_FLUSH_BLOCKS) {
2435        for(ce=bc.locked.lru; ce; ce=ce->next) {
2436            if ((ce->dev == dev) &&
2437                (ce->flags & CE_BUSY) == 0 &&
2438                (ce->clone)) {
2439
2440                ce->flags |= CE_BUSY;
2441                ents[count++] = ce;
2442
2443                if (count >= NUM_FLUSH_BLOCKS) {
2444                    break;
2445                }
2446            }
2447        }
2448    }
2449
2450    UNLOCK(bc.lock);
2451
2452    if (count != 0) {
2453        qsort(ents, count, sizeof(cache_ent **), cache_ent_cmp);
2454        flush_ents(ents, count);
2455
2456        for(i=0; i < count; i++)
2457            ents[i]->flags &= ~CE_BUSY;
2458    }
2459}
2460