bcache.c revision 87634
140875Smsmith/*-
240875Smsmith * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
340875Smsmith * All rights reserved.
440875Smsmith *
540875Smsmith * Redistribution and use in source and binary forms, with or without
640875Smsmith * modification, are permitted provided that the following conditions
740875Smsmith * are met:
840875Smsmith * 1. Redistributions of source code must retain the above copyright
940875Smsmith *    notice, this list of conditions and the following disclaimer.
1040875Smsmith * 2. Redistributions in binary form must reproduce the above copyright
1140875Smsmith *    notice, this list of conditions and the following disclaimer in the
1240875Smsmith *    documentation and/or other materials provided with the distribution.
1340875Smsmith *
1440875Smsmith * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1540875Smsmith * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1640875Smsmith * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1740875Smsmith * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1840875Smsmith * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1940875Smsmith * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2040875Smsmith * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2140875Smsmith * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2240875Smsmith * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2340875Smsmith * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2440875Smsmith * SUCH DAMAGE.
2540875Smsmith *
2650477Speter * $FreeBSD: head/sys/boot/common/bcache.c 87634 2001-12-11 00:10:00Z jhb $
2740834Smsmith */
2840834Smsmith
2940834Smsmith/*
3040834Smsmith * Simple LRU block cache
3140834Smsmith */
3240834Smsmith
3340834Smsmith#include <stand.h>
3440834Smsmith#include <string.h>
3540834Smsmith#include <bitstring.h>
3640834Smsmith
3740834Smsmith#include "bootstrap.h"
3840834Smsmith
3940834Smsmith/* #define BCACHE_DEBUG */
4040834Smsmith
4140834Smsmith#ifdef BCACHE_DEBUG
4240834Smsmith#define BCACHE_TIMEOUT	10
4387599Sobrien# define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
4440834Smsmith#else
4540834Smsmith#define BCACHE_TIMEOUT	2
4640834Smsmith# define DEBUG(fmt, args...)
4740834Smsmith#endif
4840834Smsmith
4940834Smsmith
5040834Smsmithstruct bcachectl
5140834Smsmith{
5240834Smsmith    daddr_t	bc_blkno;
5340834Smsmith    time_t	bc_stamp;
5440835Smsmith    int		bc_count;
5540834Smsmith};
5640834Smsmith
5740834Smsmithstatic struct bcachectl	*bcache_ctl;
5840834Smsmithstatic caddr_t		bcache_data;
5940834Smsmithstatic bitstr_t		*bcache_miss;
6064187Sjhbstatic u_int		bcache_nblks;
6164187Sjhbstatic u_int		bcache_blksize;
6264187Sjhbstatic u_int		bcache_hits, bcache_misses, bcache_ops, bcache_bypasses;
6364187Sjhbstatic u_int		bcache_flushes;
6464187Sjhbstatic u_int		bcache_bcount;
6540834Smsmith
6687634Sjhbstatic void	bcache_invalidate(daddr_t blkno);
6740834Smsmithstatic void	bcache_insert(caddr_t buf, daddr_t blkno);
6840834Smsmithstatic int	bcache_lookup(caddr_t buf, daddr_t blkno);
6940834Smsmith
7040834Smsmith/*
7140834Smsmith * Initialise the cache for (nblks) of (bsize).
7240834Smsmith */
7340834Smsmithint
7464187Sjhbbcache_init(u_int nblks, size_t bsize)
7540834Smsmith{
7640834Smsmith    /* discard any old contents */
7740834Smsmith    if (bcache_data != NULL) {
7840834Smsmith	free(bcache_data);
7940834Smsmith	bcache_data = NULL;
8040834Smsmith	free(bcache_ctl);
8140834Smsmith    }
8240834Smsmith
8340834Smsmith    /* Allocate control structures */
8440834Smsmith    bcache_nblks = nblks;
8540834Smsmith    bcache_blksize = bsize;
8640834Smsmith    bcache_data = malloc(bcache_nblks * bcache_blksize);
8740834Smsmith    bcache_ctl = (struct bcachectl *)malloc(bcache_nblks * sizeof(struct bcachectl));
8840834Smsmith    bcache_miss = bit_alloc((bcache_nblks + 1) / 2);
8940834Smsmith    if ((bcache_data == NULL) || (bcache_ctl == NULL) || (bcache_miss == NULL)) {
9040834Smsmith	if (bcache_miss)
9140834Smsmith	    free(bcache_miss);
9240834Smsmith	if (bcache_ctl)
9340834Smsmith	    free(bcache_ctl);
9440834Smsmith	if (bcache_data)
9540834Smsmith	    free(bcache_data);
9640834Smsmith	bcache_data = NULL;
9740834Smsmith	return(ENOMEM);
9840834Smsmith    }
9940834Smsmith
10058080Sdcs    return(0);
10158080Sdcs}
10258080Sdcs
10358080Sdcs/*
10458080Sdcs * Flush the cache
10558080Sdcs */
10658080Sdcsvoid
10764187Sjhbbcache_flush(void)
10858080Sdcs{
10964187Sjhb    u_int	i;
11058080Sdcs
11158080Sdcs    bcache_flushes++;
11258080Sdcs
11358080Sdcs    /* Flush the cache */
11441254Spaul    for (i = 0; i < bcache_nblks; i++) {
11540835Smsmith	bcache_ctl[i].bc_count = -1;
11641254Spaul	bcache_ctl[i].bc_blkno = -1;
11741254Spaul    }
11840834Smsmith}
11940834Smsmith
12087634Sjhb/*
12187634Sjhb * Handle a write request; write directly to the disk, and populate the
12287634Sjhb * cache with the new values.
12387634Sjhb */
12487634Sjhbstatic int
12587634Sjhbwrite_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
12687634Sjhb		char *buf, size_t *rsize)
12787634Sjhb{
12887634Sjhb    struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
12987634Sjhb    daddr_t			i, nblk;
13087634Sjhb    int				err;
13187634Sjhb
13287634Sjhb    nblk = size / bcache_blksize;
13387634Sjhb
13487634Sjhb    /* Invalidate the blocks being written */
13587634Sjhb    for (i = 0; i < nblk; i++) {
13687634Sjhb	bcache_invalidate(blk + i);
13787634Sjhb    }
13887634Sjhb
13987634Sjhb    /* Write the blocks */
14087634Sjhb    err = dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize);
14187634Sjhb
14287634Sjhb    /* Populate the block cache with the new data */
14387634Sjhb    if (err == 0) {
14487634Sjhb    	for (i = 0; i < nblk; i++) {
14587634Sjhb	    bcache_insert(buf + (i * bcache_blksize),blk + i);
14687634Sjhb    	}
14787634Sjhb    }
14887634Sjhb
14987634Sjhb    return err;
15087634Sjhb}
15187634Sjhb
15287634Sjhb/*
15387634Sjhb * Handle a read request; fill in parts of the request that can
15440834Smsmith * be satisfied by the cache, use the supplied strategy routine to do
15540834Smsmith * device I/O and then use the I/O results to populate the cache.
15640834Smsmith */
15787634Sjhbstatic int
15887634Sjhbread_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
15964187Sjhb		char *buf, size_t *rsize)
16040834Smsmith{
16158080Sdcs    static int			bcache_unit = -1;
16240834Smsmith    struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
16364187Sjhb    int				p_size, result;
16464187Sjhb    daddr_t			p_blk, i, j, nblk;
16540834Smsmith    caddr_t			p_buf;
16640834Smsmith
16740834Smsmith    nblk = size / bcache_blksize;
16840834Smsmith    result = 0;
16940834Smsmith
17040834Smsmith    /* Satisfy any cache hits up front */
17140834Smsmith    for (i = 0; i < nblk; i++) {
17240834Smsmith	if (bcache_lookup(buf + (bcache_blksize * i), blk + i)) {
17340834Smsmith	    bit_set(bcache_miss, i);	/* cache miss */
17440834Smsmith	    bcache_misses++;
17540834Smsmith	} else {
17640834Smsmith	    bit_clear(bcache_miss, i);	/* cache hit */
17740834Smsmith	    bcache_hits++;
17840834Smsmith	}
17940834Smsmith    }
18040834Smsmith
18140834Smsmith    /* Go back and fill in any misses  XXX optimise */
18240834Smsmith    p_blk = -1;
18340834Smsmith    p_buf = NULL;
18440834Smsmith    p_size = 0;
18540834Smsmith    for (i = 0; i < nblk; i++) {
18640834Smsmith	if (bit_test(bcache_miss, i)) {
18740834Smsmith	    /* miss, add to pending transfer */
18840834Smsmith	    if (p_blk == -1) {
18940834Smsmith		p_blk = blk + i;
19040834Smsmith		p_buf = buf + (bcache_blksize * i);
19140834Smsmith		p_size = 1;
19240834Smsmith	    } else {
19340834Smsmith		p_size++;
19440834Smsmith	    }
19540834Smsmith	} else if (p_blk != -1) {
19640834Smsmith	    /* hit, complete pending transfer */
19740834Smsmith	    result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * bcache_blksize, p_buf, NULL);
19840834Smsmith	    if (result != 0)
19940834Smsmith		goto done;
20040834Smsmith	    for (j = 0; j < p_size; j++)
20140834Smsmith		bcache_insert(p_buf + (j * bcache_blksize), p_blk + j);
20240834Smsmith	    p_blk = -1;
20340834Smsmith	}
20440834Smsmith    }
20540834Smsmith    if (p_blk != -1) {
20640834Smsmith	/* pending transfer left */
20740834Smsmith	result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * bcache_blksize, p_buf, NULL);
20840834Smsmith	if (result != 0)
20940834Smsmith	    goto done;
21040834Smsmith	for (j = 0; j < p_size; j++)
21140834Smsmith	    bcache_insert(p_buf + (j * bcache_blksize), p_blk + j);
21240834Smsmith    }
21340834Smsmith
21440834Smsmith done:
21540834Smsmith    if ((result == 0) && (rsize != NULL))
21640834Smsmith	*rsize = size;
21740834Smsmith    return(result);
21840834Smsmith}
21940834Smsmith
22087634Sjhb/*
22187634Sjhb * Requests larger than 1/2 the cache size will be bypassed and go
22287634Sjhb * directly to the disk.  XXX tune this.
22387634Sjhb */
22487634Sjhbint
22587634Sjhbbcache_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
22687634Sjhb		char *buf, size_t *rsize)
22787634Sjhb{
22887634Sjhb    static int			bcache_unit = -1;
22987634Sjhb    struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
23087634Sjhb    int				p_size, result;
23187634Sjhb    daddr_t			p_blk, i, j, nblk;
23287634Sjhb    caddr_t			p_buf;
23340834Smsmith
23487634Sjhb    bcache_ops++;
23587634Sjhb
23687634Sjhb    if(bcache_unit != unit) {
23787634Sjhb	bcache_flush();
23887634Sjhb	bcache_unit = unit;
23987634Sjhb    }
24087634Sjhb
24187634Sjhb    /* bypass large requests, or when the cache is inactive */
24287634Sjhb    if ((bcache_data == NULL) || ((size * 2 / bcache_blksize) > bcache_nblks)) {
24387634Sjhb	DEBUG("bypass %d from %d", size / bcache_blksize, blk);
24487634Sjhb	bcache_bypasses++;
24587634Sjhb	return(dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
24687634Sjhb    }
24787634Sjhb
24887634Sjhb    switch (rw) {
24987634Sjhb    case F_READ:
25087634Sjhb	return read_strategy(devdata, unit, rw, blk, size, buf, rsize);
25187634Sjhb    case F_WRITE:
25287634Sjhb	return write_strategy(devdata, unit, rw, blk, size, buf, rsize);
25387634Sjhb    }
25487634Sjhb    return -1;
25587634Sjhb}
25687634Sjhb
25787634Sjhb
25840834Smsmith/*
25940834Smsmith * Insert a block into the cache.  Retire the oldest block to do so, if required.
26040835Smsmith *
26140835Smsmith * XXX the LRU algorithm will fail after 2^31 blocks have been transferred.
26240834Smsmith */
26340834Smsmithstatic void
26440834Smsmithbcache_insert(caddr_t buf, daddr_t blkno)
26540834Smsmith{
26640834Smsmith    time_t	now;
26764187Sjhb    int		cand, ocount;
26864187Sjhb    u_int	i;
26940834Smsmith
27040834Smsmith    time(&now);
27140835Smsmith    cand = 0;				/* assume the first block */
27240835Smsmith    ocount = bcache_ctl[0].bc_count;
27340834Smsmith
27440834Smsmith    /* find the oldest block */
27540835Smsmith    for (i = 1; i < bcache_nblks; i++) {
27640834Smsmith	if (bcache_ctl[i].bc_blkno == blkno) {
27740834Smsmith	    /* reuse old entry */
27840834Smsmith	    cand = i;
27940834Smsmith	    break;
28040834Smsmith	}
28140835Smsmith	if (bcache_ctl[i].bc_count < ocount) {
28240835Smsmith	    ocount = bcache_ctl[i].bc_count;
28340834Smsmith	    cand = i;
28440835Smsmith	}
28540834Smsmith    }
28640834Smsmith
28740835Smsmith    DEBUG("insert blk %d -> %d @ %d # %d", blkno, cand, now, bcache_bcount);
28840834Smsmith    bcopy(buf, bcache_data + (bcache_blksize * cand), bcache_blksize);
28940834Smsmith    bcache_ctl[cand].bc_blkno = blkno;
29040834Smsmith    bcache_ctl[cand].bc_stamp = now;
29140835Smsmith    bcache_ctl[cand].bc_count = bcache_bcount++;
29240834Smsmith}
29340834Smsmith
29440834Smsmith/*
29540834Smsmith * Look for a block in the cache.  Blocks more than BCACHE_TIMEOUT seconds old
29640835Smsmith * may be stale (removable media) and thus are discarded.  Copy the block out
29740834Smsmith * if successful and return zero, or return nonzero on failure.
29840834Smsmith */
29940834Smsmithstatic int
30040834Smsmithbcache_lookup(caddr_t buf, daddr_t blkno)
30140834Smsmith{
30240834Smsmith    time_t	now;
30364187Sjhb    u_int	i;
30440834Smsmith
30540834Smsmith    time(&now);
30640834Smsmith
30740834Smsmith    for (i = 0; i < bcache_nblks; i++)
30840834Smsmith	/* cache hit? */
30940834Smsmith	if ((bcache_ctl[i].bc_blkno == blkno) && ((bcache_ctl[i].bc_stamp + BCACHE_TIMEOUT) >= now)) {
31040834Smsmith	    bcopy(bcache_data + (bcache_blksize * i), buf, bcache_blksize);
31140834Smsmith	    DEBUG("hit blk %d <- %d (now %d then %d)", blkno, i, now, bcache_ctl[i].bc_stamp);
31240834Smsmith	    return(0);
31340834Smsmith	}
31440834Smsmith    return(ENOENT);
31540834Smsmith}
31640834Smsmith
31787634Sjhb/*
31887634Sjhb * Invalidate a block from the cache.
31987634Sjhb */
32087634Sjhbstatic void
32187634Sjhbbcache_invalidate(daddr_t blkno)
32287634Sjhb{
32387634Sjhb    u_int	i;
32487634Sjhb
32587634Sjhb    for (i = 0; i < bcache_nblks; i++) {
32687634Sjhb	if (bcache_ctl[i].bc_blkno == blkno) {
32787634Sjhb	    bcache_ctl[i].bc_count = -1;
32887634Sjhb	    bcache_ctl[i].bc_blkno = -1;
32987634Sjhb	    DEBUG("invalidate blk %d", blkno);
33087634Sjhb	    break;
33187634Sjhb	}
33287634Sjhb    }
33387634Sjhb}
33487634Sjhb
33540834SmsmithCOMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache);
33640834Smsmith
33740834Smsmithstatic int
33840834Smsmithcommand_bcache(int argc, char *argv[])
33940834Smsmith{
34064187Sjhb    u_int	i;
34140834Smsmith
34240834Smsmith    for (i = 0; i < bcache_nblks; i++) {
34343600Sdcs	printf("%08x %04x %04x|", bcache_ctl[i].bc_blkno, (unsigned int)bcache_ctl[i].bc_stamp & 0xffff, bcache_ctl[i].bc_count & 0xffff);
34440834Smsmith	if (((i + 1) % 4) == 0)
34540834Smsmith	    printf("\n");
34640834Smsmith    }
34758080Sdcs    printf("\n%d ops  %d bypasses  %d hits  %d misses  %d flushes\n", bcache_ops, bcache_bypasses, bcache_hits, bcache_misses, bcache_flushes);
34840834Smsmith    return(CMD_OK);
34940834Smsmith}
35040834Smsmith
351