mpool.c revision 56698
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/lib/libc/db/mpool/mpool.c 56698 2000-01-27 23:07:25Z jasone $
34 */
35
36#if defined(LIBC_SCCS) && !defined(lint)
37static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
38#endif /* LIBC_SCCS and not lint */
39
40#include <sys/param.h>
41#include <sys/queue.h>
42#include <sys/stat.h>
43
44#include <errno.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <unistd.h>
49
50#include <db.h>
51
52#define	__MPOOLINTERFACE_PRIVATE
53#include <mpool.h>
54
55static BKT *mpool_bkt __P((MPOOL *));
56static BKT *mpool_look __P((MPOOL *, pgno_t));
57static int  mpool_write __P((MPOOL *, BKT *));
58
59/*
60 * mpool_open --
61 *	Initialize a memory pool.
62 */
63MPOOL *
64mpool_open(key, fd, pagesize, maxcache)
65	void *key;
66	int fd;
67	pgno_t pagesize, maxcache;
68{
69	struct stat sb;
70	MPOOL *mp;
71	int entry;
72
73	/*
74	 * Get information about the file.
75	 *
76	 * XXX
77	 * We don't currently handle pipes, although we should.
78	 */
79	if (fstat(fd, &sb))
80		return (NULL);
81	if (!S_ISREG(sb.st_mode)) {
82		errno = ESPIPE;
83		return (NULL);
84	}
85
86	/* Allocate and initialize the MPOOL cookie. */
87	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
88		return (NULL);
89	CIRCLEQ_INIT(&mp->lqh);
90	for (entry = 0; entry < HASHSIZE; ++entry)
91		CIRCLEQ_INIT(&mp->hqh[entry]);
92	mp->maxcache = maxcache;
93	mp->npages = sb.st_size / pagesize;
94	mp->pagesize = pagesize;
95	mp->fd = fd;
96	return (mp);
97}
98
99/*
100 * mpool_filter --
101 *	Initialize input/output filters.
102 */
103void
104mpool_filter(mp, pgin, pgout, pgcookie)
105	MPOOL *mp;
106	void (*pgin) __P((void *, pgno_t, void *));
107	void (*pgout) __P((void *, pgno_t, void *));
108	void *pgcookie;
109{
110	mp->pgin = pgin;
111	mp->pgout = pgout;
112	mp->pgcookie = pgcookie;
113}
114
115/*
116 * mpool_new --
117 *	Get a new page of memory.
118 */
119void *
120mpool_new(mp, pgnoaddr)
121	MPOOL *mp;
122	pgno_t *pgnoaddr;
123{
124	struct _hqh *head;
125	BKT *bp;
126
127	if (mp->npages == MAX_PAGE_NUMBER) {
128		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
129		abort();
130	}
131#ifdef STATISTICS
132	++mp->pagenew;
133#endif
134	/*
135	 * Get a BKT from the cache.  Assign a new page number, attach
136	 * it to the head of the hash chain, the tail of the lru chain,
137	 * and return.
138	 */
139	if ((bp = mpool_bkt(mp)) == NULL)
140		return (NULL);
141	*pgnoaddr = bp->pgno = mp->npages++;
142	bp->flags = MPOOL_PINNED;
143
144	head = &mp->hqh[HASHKEY(bp->pgno)];
145	CIRCLEQ_INSERT_HEAD(head, bp, hq);
146	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
147	return (bp->page);
148}
149
150/*
151 * mpool_get
152 *	Get a page.
153 */
154void *
155mpool_get(mp, pgno, flags)
156	MPOOL *mp;
157	pgno_t pgno;
158	u_int flags;				/* XXX not used? */
159{
160	struct _hqh *head;
161	BKT *bp;
162	off_t off;
163	int nr;
164
165	/* Check for attempt to retrieve a non-existent page. */
166	if (pgno >= mp->npages) {
167		errno = EINVAL;
168		return (NULL);
169	}
170
171#ifdef STATISTICS
172	++mp->pageget;
173#endif
174
175	/* Check for a page that is cached. */
176	if ((bp = mpool_look(mp, pgno)) != NULL) {
177#ifdef DEBUG
178		if (bp->flags & MPOOL_PINNED) {
179			(void)fprintf(stderr,
180			    "mpool_get: page %d already pinned\n", bp->pgno);
181			abort();
182		}
183#endif
184		/*
185		 * Move the page to the head of the hash chain and the tail
186		 * of the lru chain.
187		 */
188		head = &mp->hqh[HASHKEY(bp->pgno)];
189		CIRCLEQ_REMOVE(head, bp, hq);
190		CIRCLEQ_INSERT_HEAD(head, bp, hq);
191		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
192		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
193
194		/* Return a pinned page. */
195		bp->flags |= MPOOL_PINNED;
196		return (bp->page);
197	}
198
199	/* Get a page from the cache. */
200	if ((bp = mpool_bkt(mp)) == NULL)
201		return (NULL);
202
203	/* Read in the contents. */
204#ifdef STATISTICS
205	++mp->pageread;
206#endif
207	off = mp->pagesize * pgno;
208	if (lseek(mp->fd, off, SEEK_SET) != off)
209		return (NULL);
210	if ((nr = _read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
211		if (nr >= 0)
212			errno = EFTYPE;
213		return (NULL);
214	}
215
216	/* Set the page number, pin the page. */
217	bp->pgno = pgno;
218	bp->flags = MPOOL_PINNED;
219
220	/*
221	 * Add the page to the head of the hash chain and the tail
222	 * of the lru chain.
223	 */
224	head = &mp->hqh[HASHKEY(bp->pgno)];
225	CIRCLEQ_INSERT_HEAD(head, bp, hq);
226	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
227
228	/* Run through the user's filter. */
229	if (mp->pgin != NULL)
230		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
231
232	return (bp->page);
233}
234
235/*
236 * mpool_put
237 *	Return a page.
238 */
239int
240mpool_put(mp, page, flags)
241	MPOOL *mp;
242	void *page;
243	u_int flags;
244{
245	BKT *bp;
246
247#ifdef STATISTICS
248	++mp->pageput;
249#endif
250	bp = (BKT *)((char *)page - sizeof(BKT));
251#ifdef DEBUG
252	if (!(bp->flags & MPOOL_PINNED)) {
253		(void)fprintf(stderr,
254		    "mpool_put: page %d not pinned\n", bp->pgno);
255		abort();
256	}
257#endif
258	bp->flags &= ~MPOOL_PINNED;
259	bp->flags |= flags & MPOOL_DIRTY;
260	return (RET_SUCCESS);
261}
262
263/*
264 * mpool_close
265 *	Close the buffer pool.
266 */
267int
268mpool_close(mp)
269	MPOOL *mp;
270{
271	BKT *bp;
272
273	/* Free up any space allocated to the lru pages. */
274	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
275		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
276		free(bp);
277	}
278
279	/* Free the MPOOL cookie. */
280	free(mp);
281	return (RET_SUCCESS);
282}
283
284/*
285 * mpool_sync
286 *	Sync the pool to disk.
287 */
288int
289mpool_sync(mp)
290	MPOOL *mp;
291{
292	BKT *bp;
293
294	/* Walk the lru chain, flushing any dirty pages to disk. */
295	for (bp = mp->lqh.cqh_first;
296	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
297		if (bp->flags & MPOOL_DIRTY &&
298		    mpool_write(mp, bp) == RET_ERROR)
299			return (RET_ERROR);
300
301	/* Sync the file descriptor. */
302	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
303}
304
305/*
306 * mpool_bkt
307 *	Get a page from the cache (or create one).
308 */
309static BKT *
310mpool_bkt(mp)
311	MPOOL *mp;
312{
313	struct _hqh *head;
314	BKT *bp;
315
316	/* If under the max cached, always create a new page. */
317	if (mp->curcache < mp->maxcache)
318		goto new;
319
320	/*
321	 * If the cache is max'd out, walk the lru list for a buffer we
322	 * can flush.  If we find one, write it (if necessary) and take it
323	 * off any lists.  If we don't find anything we grow the cache anyway.
324	 * The cache never shrinks.
325	 */
326	for (bp = mp->lqh.cqh_first;
327	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
328		if (!(bp->flags & MPOOL_PINNED)) {
329			/* Flush if dirty. */
330			if (bp->flags & MPOOL_DIRTY &&
331			    mpool_write(mp, bp) == RET_ERROR)
332				return (NULL);
333#ifdef STATISTICS
334			++mp->pageflush;
335#endif
336			/* Remove from the hash and lru queues. */
337			head = &mp->hqh[HASHKEY(bp->pgno)];
338			CIRCLEQ_REMOVE(head, bp, hq);
339			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
340#ifdef DEBUG
341			{ void *spage;
342				spage = bp->page;
343				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
344				bp->page = spage;
345			}
346#endif
347			return (bp);
348		}
349
350new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
351		return (NULL);
352#ifdef STATISTICS
353	++mp->pagealloc;
354#endif
355#if defined(DEBUG) || defined(PURIFY)
356	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
357#endif
358	bp->page = (char *)bp + sizeof(BKT);
359	++mp->curcache;
360	return (bp);
361}
362
363/*
364 * mpool_write
365 *	Write a page to disk.
366 */
367static int
368mpool_write(mp, bp)
369	MPOOL *mp;
370	BKT *bp;
371{
372	off_t off;
373
374#ifdef STATISTICS
375	++mp->pagewrite;
376#endif
377
378	/* Run through the user's filter. */
379	if (mp->pgout)
380		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
381
382	off = mp->pagesize * bp->pgno;
383	if (lseek(mp->fd, off, SEEK_SET) != off)
384		return (RET_ERROR);
385	if (_write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
386		return (RET_ERROR);
387
388	bp->flags &= ~MPOOL_DIRTY;
389	return (RET_SUCCESS);
390}
391
392/*
393 * mpool_look
394 *	Lookup a page in the cache.
395 */
396static BKT *
397mpool_look(mp, pgno)
398	MPOOL *mp;
399	pgno_t pgno;
400{
401	struct _hqh *head;
402	BKT *bp;
403
404	head = &mp->hqh[HASHKEY(pgno)];
405	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
406		if (bp->pgno == pgno) {
407#ifdef STATISTICS
408			++mp->cachehit;
409#endif
410			return (bp);
411		}
412#ifdef STATISTICS
413	++mp->cachemiss;
414#endif
415	return (NULL);
416}
417
418#ifdef STATISTICS
419/*
420 * mpool_stat
421 *	Print out cache statistics.
422 */
423void
424mpool_stat(mp)
425	MPOOL *mp;
426{
427	BKT *bp;
428	int cnt;
429	char *sep;
430
431	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
432	(void)fprintf(stderr,
433	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
434	    mp->pagesize, mp->curcache, mp->maxcache);
435	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
436	    mp->pageput, mp->pageget, mp->pagenew);
437	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
438	    mp->pagealloc, mp->pageflush);
439	if (mp->cachehit + mp->cachemiss)
440		(void)fprintf(stderr,
441		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
442		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
443		    * 100, mp->cachehit, mp->cachemiss);
444	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
445	    mp->pageread, mp->pagewrite);
446
447	sep = "";
448	cnt = 0;
449	for (bp = mp->lqh.cqh_first;
450	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
451		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
452		if (bp->flags & MPOOL_DIRTY)
453			(void)fprintf(stderr, "d");
454		if (bp->flags & MPOOL_PINNED)
455			(void)fprintf(stderr, "P");
456		if (++cnt == 10) {
457			sep = "\n";
458			cnt = 0;
459		} else
460			sep = ", ";
461
462	}
463	(void)fprintf(stderr, "\n");
464}
465#endif
466