mpool.c revision 92905
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/lib/libc/db/mpool/mpool.c 92905 2002-03-21 22:49:10Z obrien $
34 */
35
36#if defined(LIBC_SCCS) && !defined(lint)
37static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
38#endif /* LIBC_SCCS and not lint */
39
40#include "namespace.h"
41#include <sys/param.h>
42#include <sys/queue.h>
43#include <sys/stat.h>
44
45#include <errno.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49#include <unistd.h>
50#include "un-namespace.h"
51
52#include <db.h>
53
54#define	__MPOOLINTERFACE_PRIVATE
55#include <mpool.h>
56
57static BKT *mpool_bkt(MPOOL *);
58static BKT *mpool_look(MPOOL *, pgno_t);
59static int  mpool_write(MPOOL *, BKT *);
60
61/*
62 * mpool_open --
63 *	Initialize a memory pool.
64 */
65MPOOL *
66mpool_open(key, fd, pagesize, maxcache)
67	void *key;
68	int fd;
69	pgno_t pagesize, maxcache;
70{
71	struct stat sb;
72	MPOOL *mp;
73	int entry;
74
75	/*
76	 * Get information about the file.
77	 *
78	 * XXX
79	 * We don't currently handle pipes, although we should.
80	 */
81	if (_fstat(fd, &sb))
82		return (NULL);
83	if (!S_ISREG(sb.st_mode)) {
84		errno = ESPIPE;
85		return (NULL);
86	}
87
88	/* Allocate and initialize the MPOOL cookie. */
89	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
90		return (NULL);
91	TAILQ_INIT(&mp->lqh);
92	for (entry = 0; entry < HASHSIZE; ++entry)
93		TAILQ_INIT(&mp->hqh[entry]);
94	mp->maxcache = maxcache;
95	mp->npages = sb.st_size / pagesize;
96	mp->pagesize = pagesize;
97	mp->fd = fd;
98	return (mp);
99}
100
101/*
102 * mpool_filter --
103 *	Initialize input/output filters.
104 */
105void
106mpool_filter(mp, pgin, pgout, pgcookie)
107	MPOOL *mp;
108	void (*pgin)(void *, pgno_t, void *);
109	void (*pgout)(void *, pgno_t, void *);
110	void *pgcookie;
111{
112	mp->pgin = pgin;
113	mp->pgout = pgout;
114	mp->pgcookie = pgcookie;
115}
116
117/*
118 * mpool_new --
119 *	Get a new page of memory.
120 */
121void *
122mpool_new(mp, pgnoaddr)
123	MPOOL *mp;
124	pgno_t *pgnoaddr;
125{
126	struct _hqh *head;
127	BKT *bp;
128
129	if (mp->npages == MAX_PAGE_NUMBER) {
130		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
131		abort();
132	}
133#ifdef STATISTICS
134	++mp->pagenew;
135#endif
136	/*
137	 * Get a BKT from the cache.  Assign a new page number, attach
138	 * it to the head of the hash chain, the tail of the lru chain,
139	 * and return.
140	 */
141	if ((bp = mpool_bkt(mp)) == NULL)
142		return (NULL);
143	*pgnoaddr = bp->pgno = mp->npages++;
144	bp->flags = MPOOL_PINNED;
145
146	head = &mp->hqh[HASHKEY(bp->pgno)];
147	TAILQ_INSERT_HEAD(head, bp, hq);
148	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
149	return (bp->page);
150}
151
152/*
153 * mpool_get
154 *	Get a page.
155 */
156void *
157mpool_get(mp, pgno, flags)
158	MPOOL *mp;
159	pgno_t pgno;
160	u_int flags;				/* XXX not used? */
161{
162	struct _hqh *head;
163	BKT *bp;
164	off_t off;
165	int nr;
166
167	/* Check for attempt to retrieve a non-existent page. */
168	if (pgno >= mp->npages) {
169		errno = EINVAL;
170		return (NULL);
171	}
172
173#ifdef STATISTICS
174	++mp->pageget;
175#endif
176
177	/* Check for a page that is cached. */
178	if ((bp = mpool_look(mp, pgno)) != NULL) {
179#ifdef DEBUG
180		if (bp->flags & MPOOL_PINNED) {
181			(void)fprintf(stderr,
182			    "mpool_get: page %d already pinned\n", bp->pgno);
183			abort();
184		}
185#endif
186		/*
187		 * Move the page to the head of the hash chain and the tail
188		 * of the lru chain.
189		 */
190		head = &mp->hqh[HASHKEY(bp->pgno)];
191		TAILQ_REMOVE(head, bp, hq);
192		TAILQ_INSERT_HEAD(head, bp, hq);
193		TAILQ_REMOVE(&mp->lqh, bp, q);
194		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
195
196		/* Return a pinned page. */
197		bp->flags |= MPOOL_PINNED;
198		return (bp->page);
199	}
200
201	/* Get a page from the cache. */
202	if ((bp = mpool_bkt(mp)) == NULL)
203		return (NULL);
204
205	/* Read in the contents. */
206#ifdef STATISTICS
207	++mp->pageread;
208#endif
209	off = mp->pagesize * pgno;
210	if (lseek(mp->fd, off, SEEK_SET) != off)
211		return (NULL);
212	if ((nr = _read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
213		if (nr >= 0)
214			errno = EFTYPE;
215		return (NULL);
216	}
217
218	/* Set the page number, pin the page. */
219	bp->pgno = pgno;
220	bp->flags = MPOOL_PINNED;
221
222	/*
223	 * Add the page to the head of the hash chain and the tail
224	 * of the lru chain.
225	 */
226	head = &mp->hqh[HASHKEY(bp->pgno)];
227	TAILQ_INSERT_HEAD(head, bp, hq);
228	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
229
230	/* Run through the user's filter. */
231	if (mp->pgin != NULL)
232		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
233
234	return (bp->page);
235}
236
237/*
238 * mpool_put
239 *	Return a page.
240 */
241int
242mpool_put(mp, page, flags)
243	MPOOL *mp;
244	void *page;
245	u_int flags;
246{
247	BKT *bp;
248
249#ifdef STATISTICS
250	++mp->pageput;
251#endif
252	bp = (BKT *)((char *)page - sizeof(BKT));
253#ifdef DEBUG
254	if (!(bp->flags & MPOOL_PINNED)) {
255		(void)fprintf(stderr,
256		    "mpool_put: page %d not pinned\n", bp->pgno);
257		abort();
258	}
259#endif
260	bp->flags &= ~MPOOL_PINNED;
261	bp->flags |= flags & MPOOL_DIRTY;
262	return (RET_SUCCESS);
263}
264
265/*
266 * mpool_close
267 *	Close the buffer pool.
268 */
269int
270mpool_close(mp)
271	MPOOL *mp;
272{
273	BKT *bp;
274
275	/* Free up any space allocated to the lru pages. */
276	while (!TAILQ_EMPTY(&mp->lqh)) {
277		bp = TAILQ_FIRST(&mp->lqh);
278		TAILQ_REMOVE(&mp->lqh, bp, q);
279		free(bp);
280	}
281
282	/* Free the MPOOL cookie. */
283	free(mp);
284	return (RET_SUCCESS);
285}
286
287/*
288 * mpool_sync
289 *	Sync the pool to disk.
290 */
291int
292mpool_sync(mp)
293	MPOOL *mp;
294{
295	BKT *bp;
296
297	/* Walk the lru chain, flushing any dirty pages to disk. */
298	TAILQ_FOREACH(bp, &mp->lqh, q)
299		if (bp->flags & MPOOL_DIRTY &&
300		    mpool_write(mp, bp) == RET_ERROR)
301			return (RET_ERROR);
302
303	/* Sync the file descriptor. */
304	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
305}
306
307/*
308 * mpool_bkt
309 *	Get a page from the cache (or create one).
310 */
311static BKT *
312mpool_bkt(mp)
313	MPOOL *mp;
314{
315	struct _hqh *head;
316	BKT *bp;
317
318	/* If under the max cached, always create a new page. */
319	if (mp->curcache < mp->maxcache)
320		goto new;
321
322	/*
323	 * If the cache is max'd out, walk the lru list for a buffer we
324	 * can flush.  If we find one, write it (if necessary) and take it
325	 * off any lists.  If we don't find anything we grow the cache anyway.
326	 * The cache never shrinks.
327	 */
328	TAILQ_FOREACH(bp, &mp->lqh, q)
329		if (!(bp->flags & MPOOL_PINNED)) {
330			/* Flush if dirty. */
331			if (bp->flags & MPOOL_DIRTY &&
332			    mpool_write(mp, bp) == RET_ERROR)
333				return (NULL);
334#ifdef STATISTICS
335			++mp->pageflush;
336#endif
337			/* Remove from the hash and lru queues. */
338			head = &mp->hqh[HASHKEY(bp->pgno)];
339			TAILQ_REMOVE(head, bp, hq);
340			TAILQ_REMOVE(&mp->lqh, bp, q);
341#ifdef DEBUG
342			{ void *spage;
343				spage = bp->page;
344				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
345				bp->page = spage;
346			}
347#endif
348			return (bp);
349		}
350
351new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
352		return (NULL);
353#ifdef STATISTICS
354	++mp->pagealloc;
355#endif
356#if defined(DEBUG) || defined(PURIFY)
357	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
358#endif
359	bp->page = (char *)bp + sizeof(BKT);
360	++mp->curcache;
361	return (bp);
362}
363
364/*
365 * mpool_write
366 *	Write a page to disk.
367 */
368static int
369mpool_write(mp, bp)
370	MPOOL *mp;
371	BKT *bp;
372{
373	off_t off;
374
375#ifdef STATISTICS
376	++mp->pagewrite;
377#endif
378
379	/* Run through the user's filter. */
380	if (mp->pgout)
381		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
382
383	off = mp->pagesize * bp->pgno;
384	if (lseek(mp->fd, off, SEEK_SET) != off)
385		return (RET_ERROR);
386	if (_write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
387		return (RET_ERROR);
388
389	bp->flags &= ~MPOOL_DIRTY;
390	return (RET_SUCCESS);
391}
392
393/*
394 * mpool_look
395 *	Lookup a page in the cache.
396 */
397static BKT *
398mpool_look(mp, pgno)
399	MPOOL *mp;
400	pgno_t pgno;
401{
402	struct _hqh *head;
403	BKT *bp;
404
405	head = &mp->hqh[HASHKEY(pgno)];
406	TAILQ_FOREACH(bp, head, hq)
407		if (bp->pgno == pgno) {
408#ifdef STATISTICS
409			++mp->cachehit;
410#endif
411			return (bp);
412		}
413#ifdef STATISTICS
414	++mp->cachemiss;
415#endif
416	return (NULL);
417}
418
419#ifdef STATISTICS
420/*
421 * mpool_stat
422 *	Print out cache statistics.
423 */
424void
425mpool_stat(mp)
426	MPOOL *mp;
427{
428	BKT *bp;
429	int cnt;
430	char *sep;
431
432	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
433	(void)fprintf(stderr,
434	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
435	    mp->pagesize, mp->curcache, mp->maxcache);
436	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
437	    mp->pageput, mp->pageget, mp->pagenew);
438	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
439	    mp->pagealloc, mp->pageflush);
440	if (mp->cachehit + mp->cachemiss)
441		(void)fprintf(stderr,
442		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
443		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
444		    * 100, mp->cachehit, mp->cachemiss);
445	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
446	    mp->pageread, mp->pagewrite);
447
448	sep = "";
449	cnt = 0;
450	TAILQ_FOREACH(bp, &mp->lqh, q) {
451		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
452		if (bp->flags & MPOOL_DIRTY)
453			(void)fprintf(stderr, "d");
454		if (bp->flags & MPOOL_PINNED)
455			(void)fprintf(stderr, "P");
456		if (++cnt == 10) {
457			sep = "\n";
458			cnt = 0;
459		} else
460			sep = ", ";
461
462	}
463	(void)fprintf(stderr, "\n");
464}
465#endif
466