1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 *  Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 *  Use is subject to license terms.
25 *
26 *  This is mostly new code.  Major revisions were made to allow multiple
27 *  file systems to share a common cache.  While this consisted primarily
28 *  of including a "devid_t" pointer in the hash functions, I also re-
29 *  organized everything to eliminate much of the duplicated code that
30 *  had existed previously.
31 */
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35#include <sys/param.h>
36#include <sys/vnode.h>
37#include <sys/sysmacros.h>
38#include <sys/filep.h>
39#include <sys/salib.h>
40#include <sys/promif.h>
41
42#ifndef	ICACHE_SIZE
43/*
44 *  These should probably be defined in an architecture-specific header
45 *  file.  The values below are analogous to those used in earlier versions
46 *  of this module.
47 */
48
49#define	ICACHE_SIZE 350	    /* Max number of I-node in file cache	*/
50#define	DCACHE_SIZE 1500    /* Max number of cached directories		*/
51#define	BCACHE_SIZE 250	    /* Max number of cached disk blocks		*/
52#endif
53
54#define	Next 0		    /* Next pointer in Fwd/Bak link		*/
55#define	Prev 1		    /* Previous pointer in Fwd/Back links	*/
56
57#define	Frst 0		    /* Ptr to first element of a chain		*/
58#define	Last 1		    /* Ptr to last element of a chain		*/
59
60#define	Hash 2		    /* Offset of hash chain ptrs.		*/
61
62typedef struct cache {	    /* Generic cache element:			*/
63    struct cache *link[4];  /* .. Fwd/Bak links for hash chain & LRU	*/
64    struct cache **chn;	    /* .. Hash chain link			*/
65    int		   dev;	    /* .. Device file handle			*/
66    void	 *data;	    /* .. Ptr to associated data		*/
67    int		  size;	    /* .. Size of cached data			*/
68} cache_t;
69
70typedef struct head {	    /* Generic cache header:			*/
71	cache_t	   *aged[2];	/* .. LRU list				*/
72	int (*cmp)(cache_t *);	/* .. Ptr to comparison function	*/
73	int	  size;		/* .. Size of "cache" objects		*/
74	int	  maxblks;	/* .. Max number of cached elements	*/
75	int	  count;	/* .. Current number of cached elements	*/
76	int	  hits;		/* .. Total cache hits			*/
77	int	  searches;	/* .. Total searches			*/
78	int	  purges;	/* .. Total purges			*/
79} head_t;
80
81/* Constructor for cache headers:					*/
82#define	cache_head(h, f, t, n) \
83	{{(cache_t *)&h, (cache_t *)&h}, f, sizeof (t), n}
84
85int read_opt;		/* Number of times cache was bypassed	*/
86static int x_dev;	/* Target device ID saved here!		*/
87static int x_len;	/* length of object			*/
88
89#define	LOG2(x) \
90	(((x) <= 16)  ?	 4 : /* Yeah, it's ugly.  But it works! */ \
91	(((x) <= 32)  ?  5 : /* .. Binary log should be part of */ \
92	(((x) <= 64)  ?  6 : /* .. the language!		*/ \
93	(((x) <= 128) ?	 7 : 8))))
94
95static cache_t *
96get_cache(cache_t *cap, head_t *chp)
97{
98	/*
99	 *  Search cache:
100	 *
101	 *  The caller pass a pointer to the first "cache" object in the current
102	 *  hash chain ["cap"] and a pointer to the corresponding cache header
103	 *  ["chp"].  This routine follows the cache chain until it finds an
104	 *  entry that matches both the current device [as noted in "x_dev"]
105	 *  and the cache-specific comparison ["chp->cmp"].
106	 *
107	 *  Returns the address of the matching cache object or null if there
108	 *  is none.
109	 */
110
111	while (cap) {
112		/*
113		 * Check all entries on the cache chain.  We expect
114		 * chains to be relatively short, so we use a simple
115		 * linear search.
116		 */
117		if ((x_dev == cap->dev) && (*chp->cmp)(cap)) {
118			/*
119			 * Found the entry we're looking for! Move it
120			 * to the front of the cache header's LRU list
121			 * before returing its addres to the caller.
122			 */
123			cap->link[Next]->link[Prev] = cap->link[Prev];
124			cap->link[Prev]->link[Next] = cap->link[Next];
125
126			cap->link[Prev] = (cache_t *)chp->aged;
127			cap->link[Next] = chp->aged[Frst];
128			chp->aged[Frst]->link[Prev] = cap;
129			chp->aged[Frst] = cap;
130			chp->hits += 1;
131			break;
132		}
133
134		cap = cap->link[Hash+Next];
135	}
136
137	chp->searches += 1;
138	return (cap);
139}
140
141static cache_t *
142reclaim_cache(head_t *chp, int dev)
143{
144	/*
145	 * Reclaim a cache element:
146	 *
147	 * This routine is used to: [a] free the oldest element from
148	 * the cache headed at "chp" and return the address of the
149	 * corresponding "cache_t" struct (iff dev == -1), or [b] free all
150	 * elements on the cache headed at "chp" that belong to the
151	 * indicated "dev"ice.
152	 */
153	cache_t *cap, *cxp;
154	cache_t *cpp = (cache_t *)chp;
155
156	while ((cap = cpp->link[Prev]) != (cache_t *)chp) {
157		/*
158		 * We follow the cache's LRU chain from oldest to
159		 * newest member.  This ensures that we remove only
160		 * the oldest element when we're called with a
161		 * negative "dev" argument.
162		 */
163		if ((dev == -1) || (dev == cap->dev)) {
164			/*
165			 * This is one of the (perhaps the only)
166			 * elements we're supposed to free.  Remove it
167			 * from both the LRU list and its associated
168			 * hash chain.  Then free the data bound the
169			 * the cache_t element and, if "dev" is
170			 * not -1, the element itself!
171			 */
172			cap->link[Prev]->link[Next] = cap->link[Next];
173			cap->link[Next]->link[Prev] = cap->link[Prev];
174
175			if ((cxp = cap->link[Hash+Prev]) != 0)
176				cxp->link[Hash+Next] = cap->link[Hash+Next];
177			else
178				*(cap->chn) = cap->link[Hash+Next];
179
180			if ((cxp = cap->link[Hash+Next]) != 0)
181				cxp->link[Hash+Prev] = cap->link[Hash+Prev];
182
183			bkmem_free((caddr_t)cap->data, cap->size);
184			if (dev == -1)
185				return (cap);
186
187			bkmem_free((caddr_t)cap, chp->size);
188			chp->count -= 1;
189
190		} else {
191			/*
192			 * Skip this element, it's not one of the
193			 * ones we want to free up.
194			 */
195			cpp = cap;
196		}
197	};
198
199	return (0);
200}
201
202static cache_t *
203set_cache(cache_t **ccp, head_t *chp, int noreclaim)
204{
205	/*
206	 *  Install a cache element:
207	 *
208	 *  The caller passes the address of cache descriptor ["chp"] and the
209	 *  hash chain into which the new element is to be linked ["ccp"].  This
210	 *  routine allocates a new cache_t structure (or, if the maximum number
211	 *  of elements has already been allocated, reclaims the oldest element
212	 *  from the cache), links it into the indicated hash chain, and returns
213	 *  its address to the caller.
214	 */
215	cache_t *cap;
216
217	if ((chp->count < chp->maxblks) &&
218	    (cap = (cache_t *)bkmem_alloc(chp->size))) {
219		/*
220		 * We haven't reached the maximum cache size yet.
221		 * Allocate a new "cache_t" struct to be added to the
222		 * cache.
223		 */
224		chp->count += 1;
225
226	} else {
227		if (noreclaim)
228			return (NULL);
229
230		/*
231		 * Cache is full.  Use the "reclaim_cache" routine to
232		 * remove the oldest element from the cache.  This
233		 * will become the cache_t struct associated with the
234		 * new element.
235		 */
236		cap = reclaim_cache(chp, -1);
237		chp->purges += 1;
238	}
239
240	bzero((char *)cap, chp->size);
241
242	cap->chn = ccp;
243	cap->link[Prev] = (cache_t *)chp;
244	cap->link[Next] = chp->aged[Frst];
245	cap->link[Prev]->link[Next] = cap->link[Next]->link[Prev] = cap;
246
247	if ((cap->link[Hash+Next] = *ccp) != 0)
248		(*ccp)->link[Hash+Prev] = cap;
249	return (*ccp = cap);
250}
251
252/*
253 *  The File Cache:
254 *
255 *  This cache (also known as the inode cache) is used to keep track of all
256 *  files open on a given device.  The only special data required to locate
257 *  a cache entry is the file reference number which is file-system dependent
258 *  (for UNIX file systems, it's an inode number).
259 */
260
261typedef struct icache {		/* Inode cache element:		*/
262	cache_t ic_hdr;		/* .. Standard header		*/
263	int	ic_num;		/* .. I-node number		*/
264} ic_t;
265
266#define	IC_MAX_HDRS (1 << LOG2(ICACHE_SIZE/6))
267#define	IC_HASH(d, i) (((d) + (i)) & (IC_MAX_HDRS - 1))
268
269static int x_inode;
270
271static int		    /* Cache search predicate:			    */
272cmp_icache(cache_t *p)
273{
274	/* Just check the file number ("x_inode") ...	*/
275	return (((ic_t *)p)->ic_num == x_inode);
276}
277
278static head_t	ic_head = cache_head(ic_head, cmp_icache, ic_t, ICACHE_SIZE);
279static cache_t *ic_hash[IC_MAX_HDRS];
280
281void *
282get_icache(int dev, int inum)
283{
284	/*
285	 *  Search File Cache:
286	 *
287	 *  This routine searches the file cache looking for the entry bound to
288	 *  the given "dev"ice and file number ["inum"].  If said entry exists,
289	 *  it returns the address of the associated file structure.  Otherwise
290	 *  it returns null.
291	 */
292	cache_t *icp;
293
294	x_dev = dev;
295	x_inode = inum;
296	icp = get_cache(ic_hash[IC_HASH(dev, inum)], &ic_head);
297
298	return (icp ? (caddr_t)icp->data : 0);
299}
300
301void
302set_icache(int dev, int inum, void *ip, int size)
303{
304	/*
305	 *  Build a File Cache Entry:
306	 *
307	 * This routne installs the "size"-byte file structure at
308	 * "*ip" in the inode cache where it may be retrieved by
309	 * subsequent call to get_icache.
310	 */
311	ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)],
312								&ic_head, 0);
313	icp->ic_num = inum;
314	icp->ic_hdr.data = ip;
315	icp->ic_hdr.dev = dev;
316	icp->ic_hdr.size = size;
317}
318
319int
320set_ricache(int dev, int inum, void *ip, int size)
321{
322	/*
323	 * Reliably set the icache
324	 *
325	 * This routine is the same as set_icache except that it
326	 * will return 1 if the entry could not be entered into the cache
327	 * without a purge.
328	 */
329	ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)],
330					&ic_head, 1);
331
332	if (icp == NULL)
333		return (1);
334
335	icp->ic_num = inum;
336	icp->ic_hdr.data = ip;
337	icp->ic_hdr.dev = dev;
338	icp->ic_hdr.size = size;
339
340	return (0);
341}
342
343/*
344 *  The Directory Cache:
345 *
346 *  This cache is designed to speed directory searches.	 Each entry cor-
347 *  responds to a directory entry that was used in a pathname resolution.
348 *  The idea is that most files used by the boot wil be contained in a hand-
349 *  full of directories, so we can speed searches if we know ahead of time
350 *  just where these directories are.
351 */
352
353typedef struct dcache {		/* Directory cache objects:	*/
354	cache_t dc_hdr;		/* .. Standard header		*/
355	int	dc_inum;	/* .. File number		*/
356	int	dc_pnum;	/* .. Parent diretory's file number */
357} dc_t;
358
359#define	DC_MAX_HDRS (1 << LOG2(DCACHE_SIZE/6))
360#define	DC_HASH(d, n, l) (((d) + (n)[0] + (n)[(l)-1] + (l)) & (DC_MAX_HDRS-1))
361
362static char *x_name;
363static int x_pnum;
364
365static int
366cmp_dcache(cache_t *p) /* Cache Search predicate:	*/
367{
368	/* Check name, length, and parent's file number	*/
369	return ((x_len == p->size) && (x_pnum == ((dc_t *)p)->dc_pnum) &&
370	    (strcmp((char *)p->data, x_name) == 0));
371}
372
373static head_t	dc_head = cache_head(dc_head, cmp_dcache, dc_t, DCACHE_SIZE);
374static cache_t *dc_hash[DC_MAX_HDRS];
375
376int
377get_dcache(int dev, char *name, int pnum)
378{
379	/*
380	 *  Search Directory Cache:
381	 *
382	 *  This routine searches the directory cache for an entry
383	 *  associated with directory number "pnum" from the given
384	 *  file system that de-scribes a file of the given "name".
385	 *  If we find such an entry, we return the corresponding file
386	 *  number, 0 otherwise.
387	 */
388	dc_t *dcp;
389
390	x_dev = dev;
391	x_len = strlen(name)+1;
392	x_pnum = pnum;
393	x_name = name;
394	dcp = (dc_t *)get_cache(dc_hash[DC_HASH(dev, name, x_len)], &dc_head);
395
396	return (dcp ? dcp->dc_inum : 0);
397}
398
399void
400set_dcache(int dev, char *name, int pnum, int inum)
401{
402	/*
403	 *  Build Directory Cache Entry:
404	 *
405	 *  This routine creates directory cache entries to be retrieved later
406	 *  via "get_dcache".  The cache key is composed of three parts: The
407	 *  device specifier, the file name ("name"), and the file number of
408	 *  the directory containing that name ("pnum").  The data portion of
409	 *  the entry consists of the file number ("inum").
410	 */
411
412	int len = strlen(name)+1;
413	dc_t *dcp =
414	    (dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)], &dc_head, 0);
415
416	if (dcp->dc_hdr.data = (void *)bkmem_alloc(len)) {
417		/*
418		 * Allocate a buffer for the pathname component, and
419		 * make this the "data" portion of the generalize
420		 * "cache_t" struct. Also fill in the cache-specific
421		 * fields (pnum, inum).
422		 */
423		dcp->dc_pnum = pnum;
424		dcp->dc_inum = inum;
425		dcp->dc_hdr.dev = dev;
426		dcp->dc_hdr.size = len;
427		bcopy(name, (char *)dcp->dc_hdr.data, len);
428
429	} else {
430		/*
431		 * Not enough memory to make a copy of the name!
432		 * There's probably not enough to do much else either!
433		 */
434		prom_panic("no memory for directory cache");
435	}
436}
437
438int
439set_rdcache(int dev, char *name, int pnum, int inum)
440{
441	/*
442	 * Reliably set the dcache
443	 *
444	 * This routine is the same as set_dcache except that it
445	 * return 1 if the entry could not be entered into
446	 * the cache without a purge.
447	 */
448	int len = strlen(name) + 1;
449	dc_t *dcp =
450		(dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)],
451								&dc_head, 1);
452
453	if (dcp == NULL)
454		return (1);
455
456	if ((dcp->dc_hdr.data = (void *)bkmem_alloc(len)) == NULL) {
457		/*
458		 * Not enough memory to make a copy of the name!
459		 * There's probably not enough to do much else either!
460		 */
461		prom_panic("no memory for directory cache");
462		/* NOTREACHED */
463	}
464
465	/*
466	 * Allocate a buffer for the pathname component, and
467	 * make this the "data" portion of the generalize
468	 * "cache_t" struct. Also fill in the cache-specific
469	 * fields (pnum, inum).
470	 */
471	dcp->dc_pnum = pnum;
472	dcp->dc_inum = inum;
473	dcp->dc_hdr.dev = dev;
474	dcp->dc_hdr.size = len;
475	bcopy(name, (char *)dcp->dc_hdr.data, len);
476
477	return (0);
478}
479
480/*
481 *  Disk Block Cache:
482 */
483
484typedef struct bcache {	    /* Disk block cache objects:		*/
485	cache_t		bc_hdr;	/* .. Standard header			*/
486	unsigned long	bc_blk;	/* .. The block number			*/
487} bc_t;
488
489#define	BC_MAX_HDRS (1 << LOG2(BCACHE_SIZE/6))
490#define	BC_HASH(d, b, l) (((d) + (b) + ((l) >> 8)) & (BC_MAX_HDRS-1))
491
492static unsigned long x_blkno;
493
494static int
495cmp_bcache(cache_t *p) /* Cache Search predicate:		*/
496{
497	/* Check block number, buffer size	*/
498	return ((x_len == p->size) && (x_blkno == ((bc_t *)p)->bc_blk));
499}
500
501static head_t	bc_head = cache_head(bc_head, cmp_bcache, bc_t, BCACHE_SIZE);
502static cache_t *bc_hash[BC_MAX_HDRS];
503
504caddr_t
505get_bcache(fileid_t *fp)
506{
507	/*
508	 *  Search Disk Block Cache:
509	 *
510	 *  This should be getting pretty monotonous by now.  Aren't generalized
511	 *  subroutines ("objects", if you prefer) great?
512	 */
513	cache_t *bcp;
514
515	x_len = fp->fi_count;
516	x_blkno = fp->fi_blocknum;
517	x_dev = fp->fi_devp->di_dcookie;
518	bcp = get_cache(bc_hash[BC_HASH(x_dev, x_blkno, x_len)], &bc_head);
519
520	return (bcp ? (caddr_t)bcp->data : 0);
521}
522
523int
524set_bcache(fileid_t *fp)
525{
526	/*
527	 *  Insert Disk Block Cache Entry:
528	 *
529	 *  In this case, we actually read the requested block into a
530	 *  dynamically allocated buffer before inserting it into the
531	 *  cache.  If the read fails, we return a non-zero value.
532	 *
533	 *  The search keys for disk blocks are the block number and
534	 *  buffer size.  The data associated with each entry is the
535	 *  corresponding data buffer.
536	 */
537	bc_t *bcp;
538
539	if (fp->fi_memp = bkmem_alloc(x_len = fp->fi_count)) {
540		/*
541		 *  We were able to succesffully allocate an input
542		 *  buffer, now read the data into it.
543		 */
544		if (diskread(fp) != 0) {
545			/*
546			 * I/O error on read. Free the input buffer,
547			 * print an error message, and bail out.
548			 */
549			bkmem_free(fp->fi_memp, x_len);
550			printf("disk read error\n");
551			return (-1);
552		}
553
554		x_blkno = fp->fi_blocknum;
555		x_dev = fp->fi_devp->di_dcookie;
556		bcp = (bc_t *)
557			set_cache(&bc_hash[BC_HASH(x_dev, x_blkno, x_len)],
558								&bc_head, 0);
559		bcp->bc_blk = x_blkno;
560		bcp->bc_hdr.dev = x_dev;
561		bcp->bc_hdr.size = x_len;
562		bcp->bc_hdr.data = (void *)fp->fi_memp;
563
564	} else {
565		/*
566		 * We could be a bit more convervative here by
567		 * calling "set_cache" before we try to allocate a
568		 * buffer (thereby giving us a chance to re-use a
569		 * previously allocated buffer) but the error recovery
570		 * is a bit trickier, and if we're that short on memory
571		 * we'll have trouble elsewhere anyway!
572		 */
573		prom_panic("can't read - no memory");
574	}
575
576	return (0);
577}
578
579void
580release_cache(int dev)
581{
582	/*
583	 *  Reclaim all cache entries:
584	 *
585	 *  This routine is called by the file-system's "closeall" method.  It
586	 *  removes all cache entries associated with that file system from the
587	 *  global cache and release any resources bound to said entrires.
588	 */
589
590	(void) reclaim_cache(&ic_head, dev);
591	(void) reclaim_cache(&dc_head, dev);
592	(void) reclaim_cache(&bc_head, dev);
593}
594
595void
596print_cache_data()
597{
598	/*
599	 *  Print some cacheing statistics ...
600	 */
601	static char	*tag[] = { "inode", "directory", "disk block", 0};
602	static head_t	*hdp[] = { &ic_head, &dc_head, &bc_head, 0};
603
604	int j;
605
606	for (j = 0; tag[j]; j++) {
607		/*
608		 * Print statistics maintained in the header
609		 * ("head_t" struct) of each of the above caches.
610		 */
611		head_t *hp = hdp[j];
612
613		if (j)
614			printf("\n");
615		printf("%s cache:\n", tag[j]);
616		printf("   max size %d\n", hp->maxblks);
617		printf("   actual size %d\n", hp->count);
618		printf("   total searches %d\n", hp->searches);
619		printf("   cache hits %d\n", hp->hits);
620		printf("   cache purges %d\n", hp->purges);
621	}
622
623	printf("\nread opts %d\n", read_opt);
624}
625