1/*	$NetBSD: rec_open.c,v 1.18 2011/06/26 22:16:46 christos Exp $	*/
2
3/*-
4 * Copyright (c) 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Mike Olson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#if HAVE_NBTOOL_CONFIG_H
36#include "nbtool_config.h"
37#endif
38
39#include <sys/cdefs.h>
40__RCSID("$NetBSD: rec_open.c,v 1.18 2011/06/26 22:16:46 christos Exp $");
41
42#include "namespace.h"
43#include <sys/types.h>
44#include <sys/mman.h>
45#include <sys/stat.h>
46
47#include <assert.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <limits.h>
51#include <stddef.h>
52#include <stdio.h>
53#include <unistd.h>
54
55#include <db.h>
56#include "recno.h"
57
58DB *
59__rec_open(const char *fname, int flags, mode_t mode, const RECNOINFO *openinfo,
60    int dflags)
61{
62	BTREE *t;
63	BTREEINFO btopeninfo;
64	DB *dbp;
65	PAGE *h;
66	struct stat sb;
67	int rfd = -1;	/* pacify gcc */
68	int sverrno;
69
70	dbp = NULL;
71	/* Open the user's file -- if this fails, we're done. */
72	if (fname != NULL) {
73#ifndef O_CLOEXEC
74#define O_CLOEXEC 0
75#endif
76		if ((rfd = open(fname, flags | O_CLOEXEC, mode)) == -1)
77			return NULL;
78#if O_CLOEXEC == 0
79		if (fcntl(rfd, F_SETFD, FD_CLOEXEC) == -1)
80			goto err;
81#endif
82	}
83
84	/* Create a btree in memory (backed by disk). */
85	if (openinfo) {
86		if (openinfo->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT))
87			goto einval;
88		btopeninfo.flags = 0;
89		btopeninfo.cachesize = openinfo->cachesize;
90		btopeninfo.maxkeypage = 0;
91		btopeninfo.minkeypage = 0;
92		btopeninfo.psize = openinfo->psize;
93		btopeninfo.compare = NULL;
94		btopeninfo.prefix = NULL;
95		btopeninfo.lorder = openinfo->lorder;
96		dbp = __bt_open(openinfo->bfname,
97		    O_RDWR, S_IRUSR | S_IWUSR, &btopeninfo, dflags);
98	} else
99		dbp = __bt_open(NULL, O_RDWR, S_IRUSR | S_IWUSR, NULL, dflags);
100	if (dbp == NULL)
101		goto err;
102
103	/*
104	 * Some fields in the tree structure are recno specific.  Fill them
105	 * in and make the btree structure look like a recno structure.  We
106	 * don't change the bt_ovflsize value, it's close enough and slightly
107	 * bigger.
108	 */
109	t = dbp->internal;
110	if (openinfo) {
111		if (openinfo->flags & R_FIXEDLEN) {
112			F_SET(t, R_FIXLEN);
113			t->bt_reclen = openinfo->reclen;
114			if (t->bt_reclen == 0)
115				goto einval;
116		}
117		t->bt_bval = openinfo->bval;
118	} else
119		t->bt_bval = '\n';
120
121	F_SET(t, R_RECNO);
122	if (fname == NULL)
123		F_SET(t, R_EOF | R_INMEM);
124	else
125		t->bt_rfd = rfd;
126
127	if (fname != NULL) {
128		/*
129		 * In 4.4BSD, stat(2) returns true for ISSOCK on pipes.
130		 * Unfortunately, that's not portable, so we use lseek
131		 * and check the errno values.
132		 */
133		errno = 0;
134		if (lseek(rfd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE) {
135			switch (flags & O_ACCMODE) {
136			case O_RDONLY:
137				F_SET(t, R_RDONLY);
138				break;
139			default:
140				goto einval;
141			}
142slow:			if ((t->bt_rfp = fdopen(rfd, "r")) == NULL)
143				goto err;
144			F_SET(t, R_CLOSEFP);
145			t->bt_irec =
146			    F_ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe;
147		} else {
148			switch (flags & O_ACCMODE) {
149			case O_RDONLY:
150				F_SET(t, R_RDONLY);
151				break;
152			case O_RDWR:
153				break;
154			default:
155				goto einval;
156			}
157
158			if (fstat(rfd, &sb))
159				goto err;
160			/*
161			 * Kluge -- we'd like to test to see if the file is too
162			 * big to mmap.  Since, we don't know what size or type
163			 * off_t's or size_t's are, what the largest unsigned
164			 * integral type is, or what random insanity the local
165			 * C compiler will perpetrate, doing the comparison in
166			 * a portable way is flatly impossible.  Hope that mmap
167			 * fails if the file is too large.
168			 */
169			if (sb.st_size == 0)
170				F_SET(t, R_EOF);
171			else {
172#ifdef MMAP_NOT_AVAILABLE
173				/*
174				 * XXX
175				 * Mmap doesn't work correctly on many current
176				 * systems.  In particular, it can fail subtly,
177				 * with cache coherency problems.  Don't use it
178				 * for now.
179				 */
180				t->bt_msize = sb.st_size;
181				if ((t->bt_smap = mmap(NULL, t->bt_msize,
182				    PROT_READ, MAP_FILE | MAP_PRIVATE, rfd,
183				    (off_t)0)) == (caddr_t)-1)
184					goto slow;
185				t->bt_cmap = t->bt_smap;
186				t->bt_emap = t->bt_smap + sb.st_size;
187				t->bt_irec = F_ISSET(t, R_FIXLEN) ?
188				    __rec_fmap : __rec_vmap;
189				F_SET(t, R_MEMMAPPED);
190#else
191				goto slow;
192#endif
193			}
194		}
195	}
196
197	/* Use the recno routines. */
198	dbp->close = __rec_close;
199	dbp->del = __rec_delete;
200	dbp->fd = __rec_fd;
201	dbp->get = __rec_get;
202	dbp->put = __rec_put;
203	dbp->seq = __rec_seq;
204	dbp->sync = __rec_sync;
205
206	/* If the root page was created, reset the flags. */
207	if ((h = mpool_get(t->bt_mp, P_ROOT, 0)) == NULL)
208		goto err;
209	if ((h->flags & P_TYPE) == P_BLEAF) {
210		F_CLR(h, P_TYPE);
211		F_SET(h, P_RLEAF);
212		mpool_put(t->bt_mp, h, MPOOL_DIRTY);
213	} else
214		mpool_put(t->bt_mp, h, 0);
215
216	if (openinfo && openinfo->flags & R_SNAPSHOT &&
217	    !F_ISSET(t, R_EOF | R_INMEM) &&
218	    t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR)
219                goto err;
220	return (dbp);
221
222einval:	errno = EINVAL;
223err:	sverrno = errno;
224	if (dbp != NULL)
225		(void)__bt_close(dbp);
226	if (fname != NULL)
227		(void)close(rfd);
228	errno = sverrno;
229	return (NULL);
230}
231
232int
233__rec_fd(const DB *dbp)
234{
235	BTREE *t;
236
237	t = dbp->internal;
238
239	/* Toss any page pinned across calls. */
240	if (t->bt_pinned != NULL) {
241		mpool_put(t->bt_mp, t->bt_pinned, 0);
242		t->bt_pinned = NULL;
243	}
244
245	/* In-memory database can't have a file descriptor. */
246	if (F_ISSET(t, R_INMEM)) {
247		errno = ENOENT;
248		return (-1);
249	}
250	return (t->bt_rfd);
251}
252