mpool.c revision 14287
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#if defined(LIBC_SCCS) && !defined(lint)
35static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
36#endif /* LIBC_SCCS and not lint */
37
38#include <sys/param.h>
39#include <sys/queue.h>
40#include <sys/stat.h>
41
42#include <errno.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <string.h>
46#include <unistd.h>
47
48#include <db.h>
49
50#define	__MPOOLINTERFACE_PRIVATE
51#include <mpool.h>
52
53static BKT *mpool_bkt __P((MPOOL *));
54static BKT *mpool_look __P((MPOOL *, pgno_t));
55static int  mpool_write __P((MPOOL *, BKT *));
56
57/*
58 * mpool_open --
59 *	Initialize a memory pool.
60 */
61MPOOL *
62mpool_open(key, fd, pagesize, maxcache)
63	void *key;
64	int fd;
65	pgno_t pagesize, maxcache;
66{
67	struct stat sb;
68	MPOOL *mp;
69	int entry;
70
71	/*
72	 * Get information about the file.
73	 *
74	 * XXX
75	 * We don't currently handle pipes, although we should.
76	 */
77	if (fstat(fd, &sb))
78		return (NULL);
79	if (!S_ISREG(sb.st_mode)) {
80		errno = ESPIPE;
81		return (NULL);
82	}
83
84	/* Allocate and initialize the MPOOL cookie. */
85	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
86		return (NULL);
87	CIRCLEQ_INIT(&mp->lqh);
88	for (entry = 0; entry < HASHSIZE; ++entry)
89		CIRCLEQ_INIT(&mp->hqh[entry]);
90	mp->maxcache = maxcache;
91	mp->npages = sb.st_size / pagesize;
92	mp->pagesize = pagesize;
93	mp->fd = fd;
94	return (mp);
95}
96
97/*
98 * mpool_filter --
99 *	Initialize input/output filters.
100 */
101void
102mpool_filter(mp, pgin, pgout, pgcookie)
103	MPOOL *mp;
104	void (*pgin) __P((void *, pgno_t, void *));
105	void (*pgout) __P((void *, pgno_t, void *));
106	void *pgcookie;
107{
108	mp->pgin = pgin;
109	mp->pgout = pgout;
110	mp->pgcookie = pgcookie;
111}
112
113/*
114 * mpool_new --
115 *	Get a new page of memory.
116 */
117void *
118mpool_new(mp, pgnoaddr)
119	MPOOL *mp;
120	pgno_t *pgnoaddr;
121{
122	struct _hqh *head;
123	BKT *bp;
124
125	if (mp->npages == MAX_PAGE_NUMBER) {
126		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
127		abort();
128	}
129#ifdef STATISTICS
130	++mp->pagenew;
131#endif
132	/*
133	 * Get a BKT from the cache.  Assign a new page number, attach
134	 * it to the head of the hash chain, the tail of the lru chain,
135	 * and return.
136	 */
137	if ((bp = mpool_bkt(mp)) == NULL)
138		return (NULL);
139	*pgnoaddr = bp->pgno = mp->npages++;
140	bp->flags = MPOOL_PINNED;
141
142	head = &mp->hqh[HASHKEY(bp->pgno)];
143	CIRCLEQ_INSERT_HEAD(head, bp, hq);
144	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
145	return (bp->page);
146}
147
148/*
149 * mpool_get
150 *	Get a page.
151 */
152void *
153mpool_get(mp, pgno, flags)
154	MPOOL *mp;
155	pgno_t pgno;
156	u_int flags;				/* XXX not used? */
157{
158	struct _hqh *head;
159	BKT *bp;
160	off_t off;
161	int nr;
162
163	/* Check for attempt to retrieve a non-existent page. */
164	if (pgno >= mp->npages) {
165		errno = EINVAL;
166		return (NULL);
167	}
168
169#ifdef STATISTICS
170	++mp->pageget;
171#endif
172
173	/* Check for a page that is cached. */
174	if ((bp = mpool_look(mp, pgno)) != NULL) {
175#ifdef DEBUG
176		if (bp->flags & MPOOL_PINNED) {
177			(void)fprintf(stderr,
178			    "mpool_get: page %d already pinned\n", bp->pgno);
179			abort();
180		}
181#endif
182		/*
183		 * Move the page to the head of the hash chain and the tail
184		 * of the lru chain.
185		 */
186		head = &mp->hqh[HASHKEY(bp->pgno)];
187		CIRCLEQ_REMOVE(head, bp, hq);
188		CIRCLEQ_INSERT_HEAD(head, bp, hq);
189		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
190		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
191
192		/* Return a pinned page. */
193		bp->flags |= MPOOL_PINNED;
194		return (bp->page);
195	}
196
197	/* Get a page from the cache. */
198	if ((bp = mpool_bkt(mp)) == NULL)
199		return (NULL);
200
201	/* Read in the contents. */
202#ifdef STATISTICS
203	++mp->pageread;
204#endif
205	off = mp->pagesize * pgno;
206	if (lseek(mp->fd, off, SEEK_SET) != off)
207		return (NULL);
208	if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
209		if (nr >= 0)
210			errno = EFTYPE;
211		return (NULL);
212	}
213
214	/* Set the page number, pin the page. */
215	bp->pgno = pgno;
216	bp->flags = MPOOL_PINNED;
217
218	/*
219	 * Add the page to the head of the hash chain and the tail
220	 * of the lru chain.
221	 */
222	head = &mp->hqh[HASHKEY(bp->pgno)];
223	CIRCLEQ_INSERT_HEAD(head, bp, hq);
224	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
225
226	/* Run through the user's filter. */
227	if (mp->pgin != NULL)
228		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
229
230	return (bp->page);
231}
232
233/*
234 * mpool_put
235 *	Return a page.
236 */
237int
238mpool_put(mp, page, flags)
239	MPOOL *mp;
240	void *page;
241	u_int flags;
242{
243	BKT *bp;
244
245#ifdef STATISTICS
246	++mp->pageput;
247#endif
248	bp = (BKT *)((char *)page - sizeof(BKT));
249#ifdef DEBUG
250	if (!(bp->flags & MPOOL_PINNED)) {
251		(void)fprintf(stderr,
252		    "mpool_put: page %d not pinned\n", bp->pgno);
253		abort();
254	}
255#endif
256	bp->flags &= ~MPOOL_PINNED;
257	bp->flags |= flags & MPOOL_DIRTY;
258	return (RET_SUCCESS);
259}
260
261/*
262 * mpool_close
263 *	Close the buffer pool.
264 */
265int
266mpool_close(mp)
267	MPOOL *mp;
268{
269	BKT *bp;
270
271	/* Free up any space allocated to the lru pages. */
272	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
273		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
274		free(bp);
275	}
276
277	/* Free the MPOOL cookie. */
278	free(mp);
279	return (RET_SUCCESS);
280}
281
282/*
283 * mpool_sync
284 *	Sync the pool to disk.
285 */
286int
287mpool_sync(mp)
288	MPOOL *mp;
289{
290	BKT *bp;
291
292	/* Walk the lru chain, flushing any dirty pages to disk. */
293	for (bp = mp->lqh.cqh_first;
294	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
295		if (bp->flags & MPOOL_DIRTY &&
296		    mpool_write(mp, bp) == RET_ERROR)
297			return (RET_ERROR);
298
299	/* Sync the file descriptor. */
300	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
301}
302
303/*
304 * mpool_bkt
305 *	Get a page from the cache (or create one).
306 */
307static BKT *
308mpool_bkt(mp)
309	MPOOL *mp;
310{
311	struct _hqh *head;
312	BKT *bp;
313
314	/* If under the max cached, always create a new page. */
315	if (mp->curcache < mp->maxcache)
316		goto new;
317
318	/*
319	 * If the cache is max'd out, walk the lru list for a buffer we
320	 * can flush.  If we find one, write it (if necessary) and take it
321	 * off any lists.  If we don't find anything we grow the cache anyway.
322	 * The cache never shrinks.
323	 */
324	for (bp = mp->lqh.cqh_first;
325	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
326		if (!(bp->flags & MPOOL_PINNED)) {
327			/* Flush if dirty. */
328			if (bp->flags & MPOOL_DIRTY &&
329			    mpool_write(mp, bp) == RET_ERROR)
330				return (NULL);
331#ifdef STATISTICS
332			++mp->pageflush;
333#endif
334			/* Remove from the hash and lru queues. */
335			head = &mp->hqh[HASHKEY(bp->pgno)];
336			CIRCLEQ_REMOVE(head, bp, hq);
337			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
338#ifdef DEBUG
339			{ void *spage;
340				spage = bp->page;
341				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
342				bp->page = spage;
343			}
344#endif
345			return (bp);
346		}
347
348new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
349		return (NULL);
350#ifdef STATISTICS
351	++mp->pagealloc;
352#endif
353#if defined(DEBUG) || defined(PURIFY)
354	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
355#endif
356	bp->page = (char *)bp + sizeof(BKT);
357	++mp->curcache;
358	return (bp);
359}
360
361/*
362 * mpool_write
363 *	Write a page to disk.
364 */
365static int
366mpool_write(mp, bp)
367	MPOOL *mp;
368	BKT *bp;
369{
370	off_t off;
371
372#ifdef STATISTICS
373	++mp->pagewrite;
374#endif
375
376	/* Run through the user's filter. */
377	if (mp->pgout)
378		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
379
380	off = mp->pagesize * bp->pgno;
381	if (lseek(mp->fd, off, SEEK_SET) != off)
382		return (RET_ERROR);
383	if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
384		return (RET_ERROR);
385
386	bp->flags &= ~MPOOL_DIRTY;
387	return (RET_SUCCESS);
388}
389
390/*
391 * mpool_look
392 *	Lookup a page in the cache.
393 */
394static BKT *
395mpool_look(mp, pgno)
396	MPOOL *mp;
397	pgno_t pgno;
398{
399	struct _hqh *head;
400	BKT *bp;
401
402	head = &mp->hqh[HASHKEY(pgno)];
403	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
404		if (bp->pgno == pgno) {
405#ifdef STATISTICS
406			++mp->cachehit;
407#endif
408			return (bp);
409		}
410#ifdef STATISTICS
411	++mp->cachemiss;
412#endif
413	return (NULL);
414}
415
416#ifdef STATISTICS
417/*
418 * mpool_stat
419 *	Print out cache statistics.
420 */
421void
422mpool_stat(mp)
423	MPOOL *mp;
424{
425	BKT *bp;
426	int cnt;
427	char *sep;
428
429	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
430	(void)fprintf(stderr,
431	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
432	    mp->pagesize, mp->curcache, mp->maxcache);
433	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
434	    mp->pageput, mp->pageget, mp->pagenew);
435	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
436	    mp->pagealloc, mp->pageflush);
437	if (mp->cachehit + mp->cachemiss)
438		(void)fprintf(stderr,
439		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
440		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
441		    * 100, mp->cachehit, mp->cachemiss);
442	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
443	    mp->pageread, mp->pagewrite);
444
445	sep = "";
446	cnt = 0;
447	for (bp = mp->lqh.cqh_first;
448	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
449		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
450		if (bp->flags & MPOOL_DIRTY)
451			(void)fprintf(stderr, "d");
452		if (bp->flags & MPOOL_PINNED)
453			(void)fprintf(stderr, "P");
454		if (++cnt == 10) {
455			sep = "\n";
456			cnt = 0;
457		} else
458			sep = ", ";
459
460	}
461	(void)fprintf(stderr, "\n");
462}
463#endif
464