mpool.c revision 70512
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/lib/libc/db/mpool/mpool.c 70512 2000-12-30 16:10:32Z green $
34 */
35
36#if defined(LIBC_SCCS) && !defined(lint)
37static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
38#endif /* LIBC_SCCS and not lint */
39
40#include <sys/param.h>
41#include <sys/queue.h>
42#include <sys/stat.h>
43
44#include <errno.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <unistd.h>
49
50#include <db.h>
51
52#define	__MPOOLINTERFACE_PRIVATE
53#include <mpool.h>
54
55static BKT *mpool_bkt __P((MPOOL *));
56static BKT *mpool_look __P((MPOOL *, pgno_t));
57static int  mpool_write __P((MPOOL *, BKT *));
58
59/*
60 * mpool_open --
61 *	Initialize a memory pool.
62 */
63MPOOL *
64mpool_open(key, fd, pagesize, maxcache)
65	void *key;
66	int fd;
67	pgno_t pagesize, maxcache;
68{
69	struct stat sb;
70	MPOOL *mp;
71	int entry;
72
73	/*
74	 * Get information about the file.
75	 *
76	 * XXX
77	 * We don't currently handle pipes, although we should.
78	 */
79	if (fstat(fd, &sb))
80		return (NULL);
81	if (!S_ISREG(sb.st_mode)) {
82		errno = ESPIPE;
83		return (NULL);
84	}
85
86	/* Allocate and initialize the MPOOL cookie. */
87	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
88		return (NULL);
89	TAILQ_INIT(&mp->lqh);
90	for (entry = 0; entry < HASHSIZE; ++entry)
91		TAILQ_INIT(&mp->hqh[entry]);
92	mp->maxcache = maxcache;
93	mp->npages = sb.st_size / pagesize;
94	mp->pagesize = pagesize;
95	mp->fd = fd;
96	return (mp);
97}
98
99/*
100 * mpool_filter --
101 *	Initialize input/output filters.
102 */
103void
104mpool_filter(mp, pgin, pgout, pgcookie)
105	MPOOL *mp;
106	void (*pgin) __P((void *, pgno_t, void *));
107	void (*pgout) __P((void *, pgno_t, void *));
108	void *pgcookie;
109{
110	mp->pgin = pgin;
111	mp->pgout = pgout;
112	mp->pgcookie = pgcookie;
113}
114
115/*
116 * mpool_new --
117 *	Get a new page of memory.
118 */
119void *
120mpool_new(mp, pgnoaddr)
121	MPOOL *mp;
122	pgno_t *pgnoaddr;
123{
124	struct _hqh *head;
125	BKT *bp;
126
127	if (mp->npages == MAX_PAGE_NUMBER) {
128		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
129		abort();
130	}
131#ifdef STATISTICS
132	++mp->pagenew;
133#endif
134	/*
135	 * Get a BKT from the cache.  Assign a new page number, attach
136	 * it to the head of the hash chain, the tail of the lru chain,
137	 * and return.
138	 */
139	if ((bp = mpool_bkt(mp)) == NULL)
140		return (NULL);
141	*pgnoaddr = bp->pgno = mp->npages++;
142	bp->flags = MPOOL_PINNED;
143
144	head = &mp->hqh[HASHKEY(bp->pgno)];
145	TAILQ_INSERT_HEAD(head, bp, hq);
146	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
147	return (bp->page);
148}
149
150/*
151 * mpool_get
152 *	Get a page.
153 */
154void *
155mpool_get(mp, pgno, flags)
156	MPOOL *mp;
157	pgno_t pgno;
158	u_int flags;				/* XXX not used? */
159{
160	struct _hqh *head;
161	BKT *bp;
162	off_t off;
163	int nr;
164
165	/* Check for attempt to retrieve a non-existent page. */
166	if (pgno >= mp->npages) {
167		errno = EINVAL;
168		return (NULL);
169	}
170
171#ifdef STATISTICS
172	++mp->pageget;
173#endif
174
175	/* Check for a page that is cached. */
176	if ((bp = mpool_look(mp, pgno)) != NULL) {
177#ifdef DEBUG
178		if (bp->flags & MPOOL_PINNED) {
179			(void)fprintf(stderr,
180			    "mpool_get: page %d already pinned\n", bp->pgno);
181			abort();
182		}
183#endif
184		/*
185		 * Move the page to the head of the hash chain and the tail
186		 * of the lru chain.
187		 */
188		head = &mp->hqh[HASHKEY(bp->pgno)];
189		TAILQ_REMOVE(head, bp, hq);
190		TAILQ_INSERT_HEAD(head, bp, hq);
191		TAILQ_REMOVE(&mp->lqh, bp, q);
192		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
193
194		/* Return a pinned page. */
195		bp->flags |= MPOOL_PINNED;
196		return (bp->page);
197	}
198
199	/* Get a page from the cache. */
200	if ((bp = mpool_bkt(mp)) == NULL)
201		return (NULL);
202
203	/* Read in the contents. */
204#ifdef STATISTICS
205	++mp->pageread;
206#endif
207	off = mp->pagesize * pgno;
208	if (lseek(mp->fd, off, SEEK_SET) != off)
209		return (NULL);
210	if ((nr = _read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
211		if (nr >= 0)
212			errno = EFTYPE;
213		return (NULL);
214	}
215
216	/* Set the page number, pin the page. */
217	bp->pgno = pgno;
218	bp->flags = MPOOL_PINNED;
219
220	/*
221	 * Add the page to the head of the hash chain and the tail
222	 * of the lru chain.
223	 */
224	head = &mp->hqh[HASHKEY(bp->pgno)];
225	TAILQ_INSERT_HEAD(head, bp, hq);
226	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
227
228	/* Run through the user's filter. */
229	if (mp->pgin != NULL)
230		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
231
232	return (bp->page);
233}
234
235/*
236 * mpool_put
237 *	Return a page.
238 */
239int
240mpool_put(mp, page, flags)
241	MPOOL *mp;
242	void *page;
243	u_int flags;
244{
245	BKT *bp;
246
247#ifdef STATISTICS
248	++mp->pageput;
249#endif
250	bp = (BKT *)((char *)page - sizeof(BKT));
251#ifdef DEBUG
252	if (!(bp->flags & MPOOL_PINNED)) {
253		(void)fprintf(stderr,
254		    "mpool_put: page %d not pinned\n", bp->pgno);
255		abort();
256	}
257#endif
258	bp->flags &= ~MPOOL_PINNED;
259	bp->flags |= flags & MPOOL_DIRTY;
260	return (RET_SUCCESS);
261}
262
263/*
264 * mpool_close
265 *	Close the buffer pool.
266 */
267int
268mpool_close(mp)
269	MPOOL *mp;
270{
271	BKT *bp;
272
273	/* Free up any space allocated to the lru pages. */
274	while (!TAILQ_EMPTY(&mp->lqh)) {
275		bp = TAILQ_FIRST(&mp->lqh);
276		TAILQ_REMOVE(&mp->lqh, bp, q);
277		free(bp);
278	}
279
280	/* Free the MPOOL cookie. */
281	free(mp);
282	return (RET_SUCCESS);
283}
284
285/*
286 * mpool_sync
287 *	Sync the pool to disk.
288 */
289int
290mpool_sync(mp)
291	MPOOL *mp;
292{
293	BKT *bp;
294
295	/* Walk the lru chain, flushing any dirty pages to disk. */
296	TAILQ_FOREACH(bp, &mp->lqh, q)
297		if (bp->flags & MPOOL_DIRTY &&
298		    mpool_write(mp, bp) == RET_ERROR)
299			return (RET_ERROR);
300
301	/* Sync the file descriptor. */
302	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
303}
304
305/*
306 * mpool_bkt
307 *	Get a page from the cache (or create one).
308 */
309static BKT *
310mpool_bkt(mp)
311	MPOOL *mp;
312{
313	struct _hqh *head;
314	BKT *bp;
315
316	/* If under the max cached, always create a new page. */
317	if (mp->curcache < mp->maxcache)
318		goto new;
319
320	/*
321	 * If the cache is max'd out, walk the lru list for a buffer we
322	 * can flush.  If we find one, write it (if necessary) and take it
323	 * off any lists.  If we don't find anything we grow the cache anyway.
324	 * The cache never shrinks.
325	 */
326	TAILQ_FOREACH(bp, &mp->lqh, q)
327		if (!(bp->flags & MPOOL_PINNED)) {
328			/* Flush if dirty. */
329			if (bp->flags & MPOOL_DIRTY &&
330			    mpool_write(mp, bp) == RET_ERROR)
331				return (NULL);
332#ifdef STATISTICS
333			++mp->pageflush;
334#endif
335			/* Remove from the hash and lru queues. */
336			head = &mp->hqh[HASHKEY(bp->pgno)];
337			TAILQ_REMOVE(head, bp, hq);
338			TAILQ_REMOVE(&mp->lqh, bp, q);
339#ifdef DEBUG
340			{ void *spage;
341				spage = bp->page;
342				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
343				bp->page = spage;
344			}
345#endif
346			return (bp);
347		}
348
349new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
350		return (NULL);
351#ifdef STATISTICS
352	++mp->pagealloc;
353#endif
354#if defined(DEBUG) || defined(PURIFY)
355	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
356#endif
357	bp->page = (char *)bp + sizeof(BKT);
358	++mp->curcache;
359	return (bp);
360}
361
362/*
363 * mpool_write
364 *	Write a page to disk.
365 */
366static int
367mpool_write(mp, bp)
368	MPOOL *mp;
369	BKT *bp;
370{
371	off_t off;
372
373#ifdef STATISTICS
374	++mp->pagewrite;
375#endif
376
377	/* Run through the user's filter. */
378	if (mp->pgout)
379		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
380
381	off = mp->pagesize * bp->pgno;
382	if (lseek(mp->fd, off, SEEK_SET) != off)
383		return (RET_ERROR);
384	if (_write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
385		return (RET_ERROR);
386
387	bp->flags &= ~MPOOL_DIRTY;
388	return (RET_SUCCESS);
389}
390
391/*
392 * mpool_look
393 *	Lookup a page in the cache.
394 */
395static BKT *
396mpool_look(mp, pgno)
397	MPOOL *mp;
398	pgno_t pgno;
399{
400	struct _hqh *head;
401	BKT *bp;
402
403	head = &mp->hqh[HASHKEY(pgno)];
404	TAILQ_FOREACH(bp, head, hq)
405		if (bp->pgno == pgno) {
406#ifdef STATISTICS
407			++mp->cachehit;
408#endif
409			return (bp);
410		}
411#ifdef STATISTICS
412	++mp->cachemiss;
413#endif
414	return (NULL);
415}
416
417#ifdef STATISTICS
418/*
419 * mpool_stat
420 *	Print out cache statistics.
421 */
422void
423mpool_stat(mp)
424	MPOOL *mp;
425{
426	BKT *bp;
427	int cnt;
428	char *sep;
429
430	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
431	(void)fprintf(stderr,
432	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
433	    mp->pagesize, mp->curcache, mp->maxcache);
434	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
435	    mp->pageput, mp->pageget, mp->pagenew);
436	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
437	    mp->pagealloc, mp->pageflush);
438	if (mp->cachehit + mp->cachemiss)
439		(void)fprintf(stderr,
440		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
441		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
442		    * 100, mp->cachehit, mp->cachemiss);
443	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
444	    mp->pageread, mp->pagewrite);
445
446	sep = "";
447	cnt = 0;
448	TAILQ_FOREACH(bp, &mp->lqh, q) {
449		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
450		if (bp->flags & MPOOL_DIRTY)
451			(void)fprintf(stderr, "d");
452		if (bp->flags & MPOOL_PINNED)
453			(void)fprintf(stderr, "P");
454		if (++cnt == 10) {
455			sep = "\n";
456			cnt = 0;
457		} else
458			sep = ", ";
459
460	}
461	(void)fprintf(stderr, "\n");
462}
463#endif
464