mpool.c revision 124735
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#if defined(LIBC_SCCS) && !defined(lint)
35static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
36#endif /* LIBC_SCCS and not lint */
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/lib/libc/db/mpool/mpool.c 124735 2004-01-20 00:40:35Z das $");
39
40#include "namespace.h"
41#include <sys/param.h>
42#include <sys/queue.h>
43#include <sys/stat.h>
44
45#include <errno.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49#include <unistd.h>
50#include "un-namespace.h"
51
52#include <db.h>
53
54#define	__MPOOLINTERFACE_PRIVATE
55#include <mpool.h>
56
57static BKT *mpool_bkt(MPOOL *);
58static BKT *mpool_look(MPOOL *, pgno_t);
59static int  mpool_write(MPOOL *, BKT *);
60
61/*
62 * mpool_open --
63 *	Initialize a memory pool.
64 */
65MPOOL *
66mpool_open(key, fd, pagesize, maxcache)
67	void *key;
68	int fd;
69	pgno_t pagesize, maxcache;
70{
71	struct stat sb;
72	MPOOL *mp;
73	int entry;
74
75	/*
76	 * Get information about the file.
77	 *
78	 * XXX
79	 * We don't currently handle pipes, although we should.
80	 */
81	if (_fstat(fd, &sb))
82		return (NULL);
83	if (!S_ISREG(sb.st_mode)) {
84		errno = ESPIPE;
85		return (NULL);
86	}
87
88	/* Allocate and initialize the MPOOL cookie. */
89	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
90		return (NULL);
91	TAILQ_INIT(&mp->lqh);
92	for (entry = 0; entry < HASHSIZE; ++entry)
93		TAILQ_INIT(&mp->hqh[entry]);
94	mp->maxcache = maxcache;
95	mp->npages = sb.st_size / pagesize;
96	mp->pagesize = pagesize;
97	mp->fd = fd;
98	return (mp);
99}
100
101/*
102 * mpool_filter --
103 *	Initialize input/output filters.
104 */
105void
106mpool_filter(mp, pgin, pgout, pgcookie)
107	MPOOL *mp;
108	void (*pgin)(void *, pgno_t, void *);
109	void (*pgout)(void *, pgno_t, void *);
110	void *pgcookie;
111{
112	mp->pgin = pgin;
113	mp->pgout = pgout;
114	mp->pgcookie = pgcookie;
115}
116
117/*
118 * mpool_new --
119 *	Get a new page of memory.
120 */
121void *
122mpool_new(mp, pgnoaddr)
123	MPOOL *mp;
124	pgno_t *pgnoaddr;
125{
126	struct _hqh *head;
127	BKT *bp;
128
129	if (mp->npages == MAX_PAGE_NUMBER) {
130		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
131		abort();
132	}
133#ifdef STATISTICS
134	++mp->pagenew;
135#endif
136	/*
137	 * Get a BKT from the cache.  Assign a new page number, attach
138	 * it to the head of the hash chain, the tail of the lru chain,
139	 * and return.
140	 */
141	if ((bp = mpool_bkt(mp)) == NULL)
142		return (NULL);
143	*pgnoaddr = bp->pgno = mp->npages++;
144	bp->flags = MPOOL_PINNED;
145
146	head = &mp->hqh[HASHKEY(bp->pgno)];
147	TAILQ_INSERT_HEAD(head, bp, hq);
148	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
149	return (bp->page);
150}
151
152/*
153 * mpool_get
154 *	Get a page.
155 */
156void *
157mpool_get(mp, pgno, flags)
158	MPOOL *mp;
159	pgno_t pgno;
160	u_int flags;				/* XXX not used? */
161{
162	struct _hqh *head;
163	BKT *bp;
164	off_t off;
165	int nr;
166
167	/* Check for attempt to retrieve a non-existent page. */
168	if (pgno >= mp->npages) {
169		errno = EINVAL;
170		return (NULL);
171	}
172
173#ifdef STATISTICS
174	++mp->pageget;
175#endif
176
177	/* Check for a page that is cached. */
178	if ((bp = mpool_look(mp, pgno)) != NULL) {
179#ifdef DEBUG
180		if (bp->flags & MPOOL_PINNED) {
181			(void)fprintf(stderr,
182			    "mpool_get: page %d already pinned\n", bp->pgno);
183			abort();
184		}
185#endif
186		/*
187		 * Move the page to the head of the hash chain and the tail
188		 * of the lru chain.
189		 */
190		head = &mp->hqh[HASHKEY(bp->pgno)];
191		TAILQ_REMOVE(head, bp, hq);
192		TAILQ_INSERT_HEAD(head, bp, hq);
193		TAILQ_REMOVE(&mp->lqh, bp, q);
194		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
195
196		/* Return a pinned page. */
197		bp->flags |= MPOOL_PINNED;
198		return (bp->page);
199	}
200
201	/* Get a page from the cache. */
202	if ((bp = mpool_bkt(mp)) == NULL)
203		return (NULL);
204
205	/* Read in the contents. */
206#ifdef STATISTICS
207	++mp->pageread;
208#endif
209	off = mp->pagesize * pgno;
210	nr = pread(mp->fd, bp->page, mp->pagesize, off);
211	if (nr != mp->pagesize) {
212		if (nr >= 0)
213			errno = EFTYPE;
214		return (NULL);
215	}
216
217	/* Set the page number, pin the page. */
218	bp->pgno = pgno;
219	bp->flags = MPOOL_PINNED;
220
221	/*
222	 * Add the page to the head of the hash chain and the tail
223	 * of the lru chain.
224	 */
225	head = &mp->hqh[HASHKEY(bp->pgno)];
226	TAILQ_INSERT_HEAD(head, bp, hq);
227	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
228
229	/* Run through the user's filter. */
230	if (mp->pgin != NULL)
231		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
232
233	return (bp->page);
234}
235
236/*
237 * mpool_put
238 *	Return a page.
239 */
240int
241mpool_put(mp, page, flags)
242	MPOOL *mp;
243	void *page;
244	u_int flags;
245{
246	BKT *bp;
247
248#ifdef STATISTICS
249	++mp->pageput;
250#endif
251	bp = (BKT *)((char *)page - sizeof(BKT));
252#ifdef DEBUG
253	if (!(bp->flags & MPOOL_PINNED)) {
254		(void)fprintf(stderr,
255		    "mpool_put: page %d not pinned\n", bp->pgno);
256		abort();
257	}
258#endif
259	bp->flags &= ~MPOOL_PINNED;
260	bp->flags |= flags & MPOOL_DIRTY;
261	return (RET_SUCCESS);
262}
263
264/*
265 * mpool_close
266 *	Close the buffer pool.
267 */
268int
269mpool_close(mp)
270	MPOOL *mp;
271{
272	BKT *bp;
273
274	/* Free up any space allocated to the lru pages. */
275	while (!TAILQ_EMPTY(&mp->lqh)) {
276		bp = TAILQ_FIRST(&mp->lqh);
277		TAILQ_REMOVE(&mp->lqh, bp, q);
278		free(bp);
279	}
280
281	/* Free the MPOOL cookie. */
282	free(mp);
283	return (RET_SUCCESS);
284}
285
286/*
287 * mpool_sync
288 *	Sync the pool to disk.
289 */
290int
291mpool_sync(mp)
292	MPOOL *mp;
293{
294	BKT *bp;
295
296	/* Walk the lru chain, flushing any dirty pages to disk. */
297	TAILQ_FOREACH(bp, &mp->lqh, q)
298		if (bp->flags & MPOOL_DIRTY &&
299		    mpool_write(mp, bp) == RET_ERROR)
300			return (RET_ERROR);
301
302	/* Sync the file descriptor. */
303	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
304}
305
306/*
307 * mpool_bkt
308 *	Get a page from the cache (or create one).
309 */
310static BKT *
311mpool_bkt(mp)
312	MPOOL *mp;
313{
314	struct _hqh *head;
315	BKT *bp;
316
317	/* If under the max cached, always create a new page. */
318	if (mp->curcache < mp->maxcache)
319		goto new;
320
321	/*
322	 * If the cache is max'd out, walk the lru list for a buffer we
323	 * can flush.  If we find one, write it (if necessary) and take it
324	 * off any lists.  If we don't find anything we grow the cache anyway.
325	 * The cache never shrinks.
326	 */
327	TAILQ_FOREACH(bp, &mp->lqh, q)
328		if (!(bp->flags & MPOOL_PINNED)) {
329			/* Flush if dirty. */
330			if (bp->flags & MPOOL_DIRTY &&
331			    mpool_write(mp, bp) == RET_ERROR)
332				return (NULL);
333#ifdef STATISTICS
334			++mp->pageflush;
335#endif
336			/* Remove from the hash and lru queues. */
337			head = &mp->hqh[HASHKEY(bp->pgno)];
338			TAILQ_REMOVE(head, bp, hq);
339			TAILQ_REMOVE(&mp->lqh, bp, q);
340#ifdef DEBUG
341			{ void *spage;
342				spage = bp->page;
343				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
344				bp->page = spage;
345			}
346#endif
347			return (bp);
348		}
349
350new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
351		return (NULL);
352#ifdef STATISTICS
353	++mp->pagealloc;
354#endif
355#if defined(DEBUG) || defined(PURIFY)
356	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
357#endif
358	bp->page = (char *)bp + sizeof(BKT);
359	++mp->curcache;
360	return (bp);
361}
362
363/*
364 * mpool_write
365 *	Write a page to disk.
366 */
367static int
368mpool_write(mp, bp)
369	MPOOL *mp;
370	BKT *bp;
371{
372	off_t off;
373
374#ifdef STATISTICS
375	++mp->pagewrite;
376#endif
377
378	/* Run through the user's filter. */
379	if (mp->pgout)
380		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
381
382	off = mp->pagesize * bp->pgno;
383	if (pwrite(mp->fd, bp->page, mp->pagesize, off) != mp->pagesize)
384		return (RET_ERROR);
385
386	bp->flags &= ~MPOOL_DIRTY;
387	return (RET_SUCCESS);
388}
389
390/*
391 * mpool_look
392 *	Lookup a page in the cache.
393 */
394static BKT *
395mpool_look(mp, pgno)
396	MPOOL *mp;
397	pgno_t pgno;
398{
399	struct _hqh *head;
400	BKT *bp;
401
402	head = &mp->hqh[HASHKEY(pgno)];
403	TAILQ_FOREACH(bp, head, hq)
404		if (bp->pgno == pgno) {
405#ifdef STATISTICS
406			++mp->cachehit;
407#endif
408			return (bp);
409		}
410#ifdef STATISTICS
411	++mp->cachemiss;
412#endif
413	return (NULL);
414}
415
416#ifdef STATISTICS
417/*
418 * mpool_stat
419 *	Print out cache statistics.
420 */
421void
422mpool_stat(mp)
423	MPOOL *mp;
424{
425	BKT *bp;
426	int cnt;
427	char *sep;
428
429	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
430	(void)fprintf(stderr,
431	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
432	    mp->pagesize, mp->curcache, mp->maxcache);
433	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
434	    mp->pageput, mp->pageget, mp->pagenew);
435	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
436	    mp->pagealloc, mp->pageflush);
437	if (mp->cachehit + mp->cachemiss)
438		(void)fprintf(stderr,
439		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
440		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
441		    * 100, mp->cachehit, mp->cachemiss);
442	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
443	    mp->pageread, mp->pagewrite);
444
445	sep = "";
446	cnt = 0;
447	TAILQ_FOREACH(bp, &mp->lqh, q) {
448		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
449		if (bp->flags & MPOOL_DIRTY)
450			(void)fprintf(stderr, "d");
451		if (bp->flags & MPOOL_PINNED)
452			(void)fprintf(stderr, "P");
453		if (++cnt == 10) {
454			sep = "\n";
455			cnt = 0;
456		} else
457			sep = ", ";
458
459	}
460	(void)fprintf(stderr, "\n");
461}
462#endif
463