backend.c revision 6035:22dc111db782
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * sqlite is not compatible with _FILE_OFFSET_BITS=64, but we need to
31 * be able to statvfs(2) possibly large systems.  This define gives us
32 * access to the transitional interfaces.  See lfcompile64(5) for how
33 * _LARGEFILE64_SOURCE works.
34 */
35#define	_LARGEFILE64_SOURCE
36
37#include <assert.h>
38#include <door.h>
39#include <dirent.h>
40#include <errno.h>
41#include <fcntl.h>
42#include <limits.h>
43#include <pthread.h>
44#include <stdarg.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <sys/stat.h>
49#include <sys/statvfs.h>
50#include <unistd.h>
51#include <zone.h>
52#include <libscf_priv.h>
53
54#include "configd.h"
55#include "repcache_protocol.h"
56
57#include <sqlite.h>
58#include <sqlite-misc.h>
59
60/*
61 * This file has two purposes:
62 *
63 * 1. It contains the database schema, and the code for setting up our backend
64 *    databases, including installing said schema.
65 *
66 * 2. It provides a simplified interface to the SQL database library, and
67 *    synchronizes MT access to the database.
68 */
69
70typedef struct backend_spent {
71	uint64_t bs_count;
72	hrtime_t bs_time;
73	hrtime_t bs_vtime;
74} backend_spent_t;
75
76typedef struct backend_totals {
77	backend_spent_t	bt_lock;	/* waiting for lock */
78	backend_spent_t	bt_exec;	/* time spent executing SQL */
79} backend_totals_t;
80
81typedef struct sqlite_backend {
82	pthread_mutex_t	be_lock;
83	pthread_t	be_thread;	/* thread holding lock */
84	struct sqlite	*be_db;
85	const char	*be_path;	/* path to db */
86	int		be_readonly;	/* readonly at start, and still is */
87	int		be_writing;	/* held for writing */
88	backend_type_t	be_type;	/* type of db */
89	hrtime_t	be_lastcheck;	/* time of last read-only check */
90	backend_totals_t be_totals[2];	/* one for reading, one for writing */
91} sqlite_backend_t;
92
93struct backend_tx {
94	sqlite_backend_t	*bt_be;
95	int			bt_readonly;
96	int			bt_type;
97	int			bt_full;	/* SQLITE_FULL during tx */
98};
99
100#define	UPDATE_TOTALS_WR(sb, writing, field, ts, vts) { \
101	backend_spent_t *__bsp = &(sb)->be_totals[!!(writing)].field; \
102	__bsp->bs_count++;						\
103	__bsp->bs_time += (gethrtime() - ts);				\
104	__bsp->bs_vtime += (gethrvtime() - vts);			\
105}
106
107#define	UPDATE_TOTALS(sb, field, ts, vts) \
108	UPDATE_TOTALS_WR(sb, (sb)->be_writing, field, ts, vts)
109
110struct backend_query {
111	char	*bq_buf;
112	size_t	bq_size;
113};
114
115struct backend_tbl_info {
116	const char *bti_name;
117	const char *bti_cols;
118};
119
120struct backend_idx_info {
121	const char *bxi_tbl;
122	const char *bxi_idx;
123	const char *bxi_cols;
124};
125
126static pthread_mutex_t backend_panic_lock = PTHREAD_MUTEX_INITIALIZER;
127static pthread_cond_t backend_panic_cv = PTHREAD_COND_INITIALIZER;
128pthread_t backend_panic_thread = 0;
129
130int backend_do_trace = 0;		/* invoke tracing callback */
131int backend_print_trace = 0;		/* tracing callback prints SQL */
132int backend_panic_abort = 0;		/* abort when panicking */
133
134/* interval between read-only checks while starting up */
135#define	BACKEND_READONLY_CHECK_INTERVAL	(2 * (hrtime_t)NANOSEC)
136
137/*
138 * Any change to the below schema should bump the version number
139 */
140#define	BACKEND_SCHEMA_VERSION		5
141
142static struct backend_tbl_info tbls_normal[] = { /* BACKEND_TYPE_NORMAL */
143	/*
144	 * service_tbl holds all services.  svc_id is the identifier of the
145	 * service.
146	 */
147	{
148		"service_tbl",
149		"svc_id          INTEGER PRIMARY KEY,"
150		"svc_name        CHAR(256) NOT NULL"
151	},
152
153	/*
154	 * instance_tbl holds all of the instances.  The parent service id
155	 * is instance_svc.
156	 */
157	{
158		"instance_tbl",
159		"instance_id     INTEGER PRIMARY KEY,"
160		"instance_name   CHAR(256) NOT NULL,"
161		"instance_svc    INTEGER NOT NULL"
162	},
163
164	/*
165	 * snapshot_lnk_tbl links (instance, snapshot name) with snapshots.
166	 */
167	{
168		"snapshot_lnk_tbl",
169		"lnk_id          INTEGER PRIMARY KEY,"
170		"lnk_inst_id     INTEGER NOT NULL,"
171		"lnk_snap_name   CHAR(256) NOT NULL,"
172		"lnk_snap_id     INTEGER NOT NULL"
173	},
174
175	/*
176	 * snaplevel_tbl maps a snapshot id to a set of named, ordered
177	 * snaplevels.
178	 */
179	{
180		"snaplevel_tbl",
181		"snap_id                 INTEGER NOT NULL,"
182		"snap_level_num          INTEGER NOT NULL,"
183		"snap_level_id           INTEGER NOT NULL,"
184		"snap_level_service_id   INTEGER NOT NULL,"
185		"snap_level_service      CHAR(256) NOT NULL,"
186		"snap_level_instance_id  INTEGER NULL,"
187		"snap_level_instance     CHAR(256) NULL"
188	},
189
190	/*
191	 * snaplevel_lnk_tbl links snaplevels to property groups.
192	 * snaplvl_pg_* is identical to the original property group,
193	 * and snaplvl_gen_id overrides the generation number.
194	 * The service/instance ids are as in the snaplevel.
195	 */
196	{
197		"snaplevel_lnk_tbl",
198		"snaplvl_level_id INTEGER NOT NULL,"
199		"snaplvl_pg_id    INTEGER NOT NULL,"
200		"snaplvl_pg_name  CHAR(256) NOT NULL,"
201		"snaplvl_pg_type  CHAR(256) NOT NULL,"
202		"snaplvl_pg_flags INTEGER NOT NULL,"
203		"snaplvl_gen_id   INTEGER NOT NULL"
204	},
205
206	{ NULL, NULL }
207};
208
209static struct backend_idx_info idxs_normal[] = { /* BACKEND_TYPE_NORMAL */
210	{ "service_tbl",	"name",	"svc_name" },
211	{ "instance_tbl",	"name",	"instance_svc, instance_name" },
212	{ "snapshot_lnk_tbl",	"name",	"lnk_inst_id, lnk_snap_name" },
213	{ "snapshot_lnk_tbl",	"snapid", "lnk_snap_id" },
214	{ "snaplevel_tbl",	"id",	"snap_id" },
215	{ "snaplevel_lnk_tbl",	"id",	"snaplvl_pg_id" },
216	{ "snaplevel_lnk_tbl",	"level", "snaplvl_level_id" },
217	{ NULL, NULL, NULL }
218};
219
220static struct backend_tbl_info tbls_np[] = { /* BACKEND_TYPE_NONPERSIST */
221	{ NULL, NULL }
222};
223
224static struct backend_idx_info idxs_np[] = {	/* BACKEND_TYPE_NONPERSIST */
225	{ NULL, NULL, NULL }
226};
227
228static struct backend_tbl_info tbls_common[] = { /* all backend types */
229	/*
230	 * pg_tbl defines property groups.  They are associated with a single
231	 * service or instance.  The pg_gen_id links them with the latest
232	 * "edited" version of its properties.
233	 */
234	{
235		"pg_tbl",
236		"pg_id           INTEGER PRIMARY KEY,"
237		"pg_parent_id    INTEGER NOT NULL,"
238		"pg_name         CHAR(256) NOT NULL,"
239		"pg_type         CHAR(256) NOT NULL,"
240		"pg_flags        INTEGER NOT NULL,"
241		"pg_gen_id       INTEGER NOT NULL"
242	},
243
244	/*
245	 * prop_lnk_tbl links a particular pg_id and gen_id to a set of
246	 * (prop_name, prop_type, val_id) trios.
247	 */
248	{
249		"prop_lnk_tbl",
250		"lnk_prop_id     INTEGER PRIMARY KEY,"
251		"lnk_pg_id       INTEGER NOT NULL,"
252		"lnk_gen_id      INTEGER NOT NULL,"
253		"lnk_prop_name   CHAR(256) NOT NULL,"
254		"lnk_prop_type   CHAR(2) NOT NULL,"
255		"lnk_val_id      INTEGER"
256	},
257
258	/*
259	 * value_tbl maps a value_id to a set of values.  For any given
260	 * value_id, value_type is constant.
261	 */
262	{
263		"value_tbl",
264		"value_id        INTEGER NOT NULL,"
265		"value_type      CHAR(1) NOT NULL,"
266		"value_value     VARCHAR NOT NULL"
267	},
268
269	/*
270	 * id_tbl has one row per id space
271	 */
272	{
273		"id_tbl",
274		"id_name         STRING NOT NULL,"
275		"id_next         INTEGER NOT NULL"
276	},
277
278	/*
279	 * schema_version has a single row, which contains
280	 * BACKEND_SCHEMA_VERSION at the time of creation.
281	 */
282	{
283		"schema_version",
284		"schema_version  INTEGER"
285	},
286	{ NULL, NULL }
287};
288
289static struct backend_idx_info idxs_common[] = { /* all backend types */
290	{ "pg_tbl",		"parent", "pg_parent_id" },
291	{ "pg_tbl",		"name",	"pg_parent_id, pg_name" },
292	{ "pg_tbl",		"type",	"pg_parent_id, pg_type" },
293	{ "prop_lnk_tbl",	"base",	"lnk_pg_id, lnk_gen_id" },
294	{ "prop_lnk_tbl",	"val",	"lnk_val_id" },
295	{ "value_tbl",		"id",	"value_id" },
296	{ "id_tbl",		"id",	"id_name" },
297	{ NULL, NULL, NULL }
298};
299
300struct run_single_int_info {
301	uint32_t	*rs_out;
302	int		rs_result;
303};
304
305/*ARGSUSED*/
306static int
307run_single_int_callback(void *arg, int columns, char **vals, char **names)
308{
309	struct run_single_int_info *info = arg;
310	uint32_t val;
311
312	char *endptr = vals[0];
313
314	assert(info->rs_result != REP_PROTOCOL_SUCCESS);
315	assert(columns == 1);
316
317	if (vals[0] == NULL)
318		return (BACKEND_CALLBACK_CONTINUE);
319
320	errno = 0;
321	val = strtoul(vals[0], &endptr, 10);
322	if ((val == 0 && endptr == vals[0]) || *endptr != 0 || errno != 0)
323		backend_panic("malformed integer \"%20s\"", vals[0]);
324
325	*info->rs_out = val;
326	info->rs_result = REP_PROTOCOL_SUCCESS;
327	return (BACKEND_CALLBACK_CONTINUE);
328}
329
330/*ARGSUSED*/
331int
332backend_fail_if_seen(void *arg, int columns, char **vals, char **names)
333{
334	return (BACKEND_CALLBACK_ABORT);
335}
336
337/*
338 * check to see if we can successfully start a transaction;  if not, the
339 * filesystem is mounted read-only.
340 */
341static int
342backend_is_readonly(struct sqlite *db, const char *path)
343{
344	int r;
345	statvfs64_t stat;
346
347	if (statvfs64(path, &stat) == 0 && (stat.f_flag & ST_RDONLY))
348		return (SQLITE_READONLY);
349
350	r = sqlite_exec(db,
351	    "BEGIN TRANSACTION; "
352	    "UPDATE schema_version SET schema_version = schema_version; ",
353	    NULL, NULL, NULL);
354	(void) sqlite_exec(db, "ROLLBACK TRANSACTION", NULL, NULL, NULL);
355	return (r);
356}
357
358static void
359backend_trace_sql(void *arg, const char *sql)
360{
361	sqlite_backend_t *be = arg;
362
363	if (backend_print_trace) {
364		(void) fprintf(stderr, "%d: %s\n", be->be_type, sql);
365	}
366}
367
368static sqlite_backend_t be_info[BACKEND_TYPE_TOTAL];
369static sqlite_backend_t *bes[BACKEND_TYPE_TOTAL];
370
371#define	BACKEND_PANIC_TIMEOUT	(50 * MILLISEC)
372/*
373 * backend_panic() -- some kind of database problem or corruption has been hit.
374 * We attempt to quiesce the other database users -- all of the backend sql
375 * entry points will call backend_panic(NULL) if a panic is in progress, as
376 * will any attempt to start a transaction.
377 *
378 * We give threads holding a backend lock 50ms (BACKEND_PANIC_TIMEOUT) to
379 * either drop the lock or call backend_panic().  If they don't respond in
380 * time, we'll just exit anyway.
381 */
382void
383backend_panic(const char *format, ...)
384{
385	int i;
386	va_list args;
387	int failed = 0;
388
389	(void) pthread_mutex_lock(&backend_panic_lock);
390	if (backend_panic_thread != 0) {
391		(void) pthread_mutex_unlock(&backend_panic_lock);
392		/*
393		 * first, drop any backend locks we're holding, then
394		 * sleep forever on the panic_cv.
395		 */
396		for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
397			if (bes[i] != NULL &&
398			    bes[i]->be_thread == pthread_self())
399				(void) pthread_mutex_unlock(&bes[i]->be_lock);
400		}
401		(void) pthread_mutex_lock(&backend_panic_lock);
402		for (;;)
403			(void) pthread_cond_wait(&backend_panic_cv,
404			    &backend_panic_lock);
405	}
406	backend_panic_thread = pthread_self();
407	(void) pthread_mutex_unlock(&backend_panic_lock);
408
409	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
410		if (bes[i] != NULL && bes[i]->be_thread == pthread_self())
411			(void) pthread_mutex_unlock(&bes[i]->be_lock);
412	}
413
414	va_start(args, format);
415	configd_vcritical(format, args);
416	va_end(args);
417
418	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
419		timespec_t rel;
420
421		rel.tv_sec = 0;
422		rel.tv_nsec = BACKEND_PANIC_TIMEOUT;
423
424		if (bes[i] != NULL && bes[i]->be_thread != pthread_self()) {
425			if (pthread_mutex_reltimedlock_np(&bes[i]->be_lock,
426			    &rel) != 0)
427				failed++;
428		}
429	}
430	if (failed) {
431		configd_critical("unable to quiesce database\n");
432	}
433
434	if (backend_panic_abort)
435		abort();
436
437	exit(CONFIGD_EXIT_DATABASE_BAD);
438}
439
440/*
441 * Returns
442 *   _SUCCESS
443 *   _DONE - callback aborted query
444 *   _NO_RESOURCES - out of memory (_FULL & _TOOBIG?)
445 */
446static int
447backend_error(sqlite_backend_t *be, int error, char *errmsg)
448{
449	if (error == SQLITE_OK)
450		return (REP_PROTOCOL_SUCCESS);
451
452	switch (error) {
453	case SQLITE_ABORT:
454		free(errmsg);
455		return (REP_PROTOCOL_DONE);
456
457	case SQLITE_NOMEM:
458	case SQLITE_FULL:
459	case SQLITE_TOOBIG:
460		free(errmsg);
461		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
462
463	default:
464		backend_panic("%s: db error: %s", be->be_path, errmsg);
465		/*NOTREACHED*/
466	}
467}
468
469static void
470backend_backup_cleanup(const char **out_arg, ssize_t out_sz)
471{
472	char **out = (char **)out_arg;
473
474	while (out_sz-- > 0)
475		free(*out++);
476	free(out_arg);
477}
478
479/*
480 * builds a inverse-time-sorted array of backup files.  The path is a
481 * a single buffer, and the pointers look like:
482 *
483 *	/this/is/a/full/path/to/repository-name-YYYYMMDDHHMMSS
484 *	^pathname		^	       ^(pathname+pathlen)
485 *				basename
486 *
487 * dirname will either be pathname, or ".".
488 *
489 * Returns the number of elements in the array, 0 if there are no previous
490 * backups, or -1 on error.
491 */
492static ssize_t
493backend_backup_get_prev(char *pathname, size_t pathlen, const char ***out_arg)
494{
495	char b_start, b_end;
496	DIR *dir;
497	char **out = NULL;
498	char *name, *p;
499	char *dirname, *basename;
500	char *pathend;
501	struct dirent *ent;
502
503	size_t count = 0;
504	size_t baselen;
505
506	/*
507	 * year, month, day, hour, min, sec, plus an '_'.
508	 */
509	const size_t ndigits = 4 + 5*2 + 1;
510	const size_t baroffset = 4 + 2*2;
511
512	size_t idx;
513
514	pathend = pathname + pathlen;
515	b_end = *pathend;
516	*pathend = '\0';
517
518	basename = strrchr(pathname, '/');
519
520	if (basename != NULL) {
521		assert(pathend > pathname && basename < pathend);
522		basename++;
523		dirname = pathname;
524	} else {
525		basename = pathname;
526		dirname = ".";
527	}
528
529	baselen = strlen(basename);
530
531	/*
532	 * munge the string temporarily for the opendir(), then restore it.
533	 */
534	b_start = basename[0];
535
536	basename[0] = '\0';
537	dir = opendir(dirname);
538	basename[0] = b_start;		/* restore path */
539
540	if (dir == NULL)
541		goto fail;
542
543
544	while ((ent = readdir(dir)) != NULL) {
545		/*
546		 * Must match:
547		 *	basename-YYYYMMDD_HHMMSS
548		 * or we ignore it.
549		 */
550		if (strncmp(ent->d_name, basename, baselen) != 0)
551			continue;
552
553		name = ent->d_name;
554		if (name[baselen] != '-')
555			continue;
556
557		p = name + baselen + 1;
558
559		for (idx = 0; idx < ndigits; idx++) {
560			char c = p[idx];
561			if (idx == baroffset && c != '_')
562				break;
563			if (idx != baroffset && (c < '0' || c > '9'))
564				break;
565		}
566		if (idx != ndigits || p[idx] != '\0')
567			continue;
568
569		/*
570		 * We have a match.  insertion-sort it into our list.
571		 */
572		name = strdup(name);
573		if (name == NULL)
574			goto fail_closedir;
575		p = strrchr(name, '-');
576
577		for (idx = 0; idx < count; idx++) {
578			char *tmp = out[idx];
579			char *tp = strrchr(tmp, '-');
580
581			int cmp = strcmp(p, tp);
582			if (cmp == 0)
583				cmp = strcmp(name, tmp);
584
585			if (cmp == 0) {
586				free(name);
587				name = NULL;
588				break;
589			} else if (cmp > 0) {
590				out[idx] = name;
591				name = tmp;
592				p = tp;
593			}
594		}
595
596		if (idx == count) {
597			char **new_out = realloc(out,
598			    (count + 1) * sizeof (*out));
599
600			if (new_out == NULL) {
601				free(name);
602				goto fail_closedir;
603			}
604
605			out = new_out;
606			out[count++] = name;
607		} else {
608			assert(name == NULL);
609		}
610	}
611	(void) closedir(dir);
612
613	basename[baselen] = b_end;
614
615	*out_arg = (const char **)out;
616	return (count);
617
618fail_closedir:
619	(void) closedir(dir);
620fail:
621	basename[0] = b_start;
622	*pathend = b_end;
623
624	backend_backup_cleanup((const char **)out, count);
625
626	*out_arg = NULL;
627	return (-1);
628}
629
630/*
631 * Copies the repository path into out, a buffer of out_len bytes,
632 * removes the ".db" (or whatever) extension, and, if name is non-NULL,
633 * appends "-name" to it.  If name is non-NULL, it can fail with:
634 *
635 *	_TRUNCATED	will not fit in buffer.
636 *	_BAD_REQUEST	name is not a valid identifier
637 */
638static rep_protocol_responseid_t
639backend_backup_base(sqlite_backend_t *be, const char *name,
640    char *out, size_t out_len)
641{
642	char *p, *q;
643	size_t len;
644
645	/*
646	 * for paths of the form /path/to/foo.db, we truncate at the final
647	 * '.'.
648	 */
649	(void) strlcpy(out, be->be_path, out_len);
650
651	p = strrchr(out, '/');
652	q = strrchr(out, '.');
653
654	if (p != NULL && q != NULL && q > p)
655		*q = 0;
656
657	if (name != NULL) {
658		len = strlen(out);
659		assert(len < out_len);
660
661		out += len;
662		out_len -= len;
663
664		len = strlen(name);
665
666		/*
667		 * verify that the name tag is entirely alphabetic,
668		 * non-empty, and not too long.
669		 */
670		if (len == 0 || len >= REP_PROTOCOL_NAME_LEN ||
671		    uu_check_name(name, UU_NAME_DOMAIN) < 0)
672			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
673
674		if (snprintf(out, out_len, "-%s", name) >= out_len)
675			return (REP_PROTOCOL_FAIL_TRUNCATED);
676	}
677
678	return (REP_PROTOCOL_SUCCESS);
679}
680
681/*
682 * See if a backup is needed.  We do a backup unless both files are
683 * byte-for-byte identical.
684 */
685static int
686backend_check_backup_needed(const char *rep_name, const char *backup_name)
687{
688	int repfd = open(rep_name, O_RDONLY);
689	int fd = open(backup_name, O_RDONLY);
690	struct stat s_rep, s_backup;
691	int c1, c2;
692
693	FILE *f_rep = NULL;
694	FILE *f_backup = NULL;
695
696	if (repfd < 0 || fd < 0)
697		goto fail;
698
699	if (fstat(repfd, &s_rep) < 0 || fstat(fd, &s_backup) < 0)
700		goto fail;
701
702	/*
703	 * if they are the same file, we need to do a backup to break the
704	 * hard link or symlink involved.
705	 */
706	if (s_rep.st_ino == s_backup.st_ino && s_rep.st_dev == s_backup.st_dev)
707		goto fail;
708
709	if (s_rep.st_size != s_backup.st_size)
710		goto fail;
711
712	if ((f_rep = fdopen(repfd, "r")) == NULL ||
713	    (f_backup = fdopen(fd, "r")) == NULL)
714		goto fail;
715
716	do {
717		c1 = getc(f_rep);
718		c2 = getc(f_backup);
719		if (c1 != c2)
720			goto fail;
721	} while (c1 != EOF);
722
723	if (!ferror(f_rep) && !ferror(f_backup)) {
724		(void) fclose(f_rep);
725		(void) fclose(f_backup);
726		(void) close(repfd);
727		(void) close(fd);
728		return (0);
729	}
730
731fail:
732	if (f_rep != NULL)
733		(void) fclose(f_rep);
734	if (f_backup != NULL)
735		(void) fclose(f_backup);
736	if (repfd >= 0)
737		(void) close(repfd);
738	if (fd >= 0)
739		(void) close(fd);
740	return (1);
741}
742
743/*
744 * This interface is called to perform the actual copy
745 *
746 * Return:
747 *	_FAIL_UNKNOWN		read/write fails
748 *	_FAIL_NO_RESOURCES	out of memory
749 *	_SUCCESS		copy succeeds
750 */
751static rep_protocol_responseid_t
752backend_do_copy(const char *src, int srcfd, const char *dst,
753    int dstfd, size_t *sz)
754{
755	char *buf;
756	off_t nrd, nwr, n, r_off = 0, w_off = 0;
757
758	if ((buf = malloc(8192)) == NULL)
759		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
760
761	while ((nrd = read(srcfd, buf, 8192)) != 0) {
762		if (nrd < 0) {
763			if (errno == EINTR)
764				continue;
765
766			configd_critical(
767			    "Backend copy failed: fails to read from %s "
768			    "at offset %d: %s\n", src, r_off, strerror(errno));
769			free(buf);
770			return (REP_PROTOCOL_FAIL_UNKNOWN);
771		}
772
773		r_off += nrd;
774
775		nwr = 0;
776		do {
777			if ((n = write(dstfd, &buf[nwr], nrd - nwr)) < 0) {
778				if (errno == EINTR)
779					continue;
780
781				configd_critical(
782				    "Backend copy failed: fails to write to %s "
783				    "at offset %d: %s\n", dst, w_off,
784				    strerror(errno));
785				free(buf);
786				return (REP_PROTOCOL_FAIL_UNKNOWN);
787			}
788
789			nwr += n;
790			w_off += n;
791
792		} while (nwr < nrd);
793	}
794
795	if (sz)
796		*sz = w_off;
797
798	free(buf);
799	return (REP_PROTOCOL_SUCCESS);
800}
801
802/*
803 * Can return:
804 *	_BAD_REQUEST		name is not valid
805 *	_TRUNCATED		name is too long for current repository path
806 *	_UNKNOWN		failed for unknown reason (details written to
807 *				console)
808 *	_BACKEND_READONLY	backend is not writable
809 *	_NO_RESOURCES		out of memory
810 *	_SUCCESS		Backup completed successfully.
811 */
812static rep_protocol_responseid_t
813backend_create_backup_locked(sqlite_backend_t *be, const char *name)
814{
815	const char **old_list;
816	ssize_t old_sz;
817	ssize_t old_max = max_repository_backups;
818	ssize_t cur;
819	char *finalname;
820	char *finalpath;
821	char *tmppath;
822	int infd, outfd;
823	size_t len;
824	time_t now;
825	struct tm now_tm;
826	rep_protocol_responseid_t result;
827
828	if ((finalpath = malloc(PATH_MAX)) == NULL)
829		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
830
831	if ((tmppath = malloc(PATH_MAX)) == NULL) {
832		free(finalpath);
833		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
834	}
835
836	if (be->be_readonly) {
837		result = REP_PROTOCOL_FAIL_BACKEND_READONLY;
838		goto out;
839	}
840
841	result = backend_backup_base(be, name, finalpath, PATH_MAX);
842	if (result != REP_PROTOCOL_SUCCESS)
843		goto out;
844
845	if (!backend_check_backup_needed(be->be_path, finalpath)) {
846		result = REP_PROTOCOL_SUCCESS;
847		goto out;
848	}
849
850	/*
851	 * remember the original length, and the basename location
852	 */
853	len = strlen(finalpath);
854	finalname = strrchr(finalpath, '/');
855	if (finalname != NULL)
856		finalname++;
857	else
858		finalname = finalpath;
859
860	(void) strlcpy(tmppath, finalpath, PATH_MAX);
861	if (strlcat(tmppath, "-tmpXXXXXX", PATH_MAX) >= PATH_MAX) {
862		result = REP_PROTOCOL_FAIL_TRUNCATED;
863		goto out;
864	}
865
866	now = time(NULL);
867	if (localtime_r(&now, &now_tm) == NULL) {
868		configd_critical(
869		    "\"%s\" backup failed: localtime(3C) failed: %s\n", name,
870		    be->be_path, strerror(errno));
871		result = REP_PROTOCOL_FAIL_UNKNOWN;
872		goto out;
873	}
874
875	if (strftime(finalpath + len, PATH_MAX - len,
876	    "-%Y""%m""%d""_""%H""%M""%S", &now_tm) >= PATH_MAX - len) {
877		result = REP_PROTOCOL_FAIL_TRUNCATED;
878		goto out;
879	}
880
881	infd = open(be->be_path, O_RDONLY);
882	if (infd < 0) {
883		configd_critical("\"%s\" backup failed: opening %s: %s\n", name,
884		    be->be_path, strerror(errno));
885		result = REP_PROTOCOL_FAIL_UNKNOWN;
886		goto out;
887	}
888
889	outfd = mkstemp(tmppath);
890	if (outfd < 0) {
891		configd_critical("\"%s\" backup failed: mkstemp(%s): %s\n",
892		    name, tmppath, strerror(errno));
893		(void) close(infd);
894		result = REP_PROTOCOL_FAIL_UNKNOWN;
895		goto out;
896	}
897
898	if ((result = backend_do_copy((const char *)be->be_path, infd,
899	    (const char *)tmppath, outfd, NULL)) != REP_PROTOCOL_SUCCESS)
900		goto fail;
901
902	/*
903	 * grab the old list before doing our re-name.
904	 */
905	if (old_max > 0)
906		old_sz = backend_backup_get_prev(finalpath, len, &old_list);
907
908	if (rename(tmppath, finalpath) < 0) {
909		configd_critical(
910		    "\"%s\" backup failed: rename(%s, %s): %s\n",
911		    name, tmppath, finalpath, strerror(errno));
912		result = REP_PROTOCOL_FAIL_UNKNOWN;
913		goto fail;
914	}
915
916	tmppath[len] = 0;	/* strip -XXXXXX, for reference symlink */
917
918	(void) unlink(tmppath);
919	if (symlink(finalname, tmppath) < 0) {
920		configd_critical(
921		    "\"%s\" backup completed, but updating "
922		    "\"%s\" symlink to \"%s\" failed: %s\n",
923		    name, tmppath, finalname, strerror(errno));
924	}
925
926	if (old_max > 0 && old_sz > 0) {
927		/* unlink all but the first (old_max - 1) files */
928		for (cur = old_max - 1; cur < old_sz; cur++) {
929			(void) strlcpy(finalname, old_list[cur],
930			    PATH_MAX - (finalname - finalpath));
931			if (unlink(finalpath) < 0)
932				configd_critical(
933				    "\"%s\" backup completed, but removing old "
934				    "file \"%s\" failed: %s\n",
935				    name, finalpath, strerror(errno));
936		}
937
938		backend_backup_cleanup(old_list, old_sz);
939	}
940
941	result = REP_PROTOCOL_SUCCESS;
942
943fail:
944	(void) close(infd);
945	(void) close(outfd);
946	if (result != REP_PROTOCOL_SUCCESS)
947		(void) unlink(tmppath);
948
949out:
950	free(finalpath);
951	free(tmppath);
952
953	return (result);
954}
955
956static int
957backend_check_readonly(sqlite_backend_t *be, int writing, hrtime_t t)
958{
959	char *errp;
960	struct sqlite *new;
961	int r;
962
963	assert(be->be_readonly);
964	assert(be == bes[BACKEND_TYPE_NORMAL]);
965
966	/*
967	 * If we don't *need* to be writable, only check every once in a
968	 * while.
969	 */
970	if (!writing) {
971		if ((uint64_t)(t - be->be_lastcheck) <
972		    BACKEND_READONLY_CHECK_INTERVAL)
973			return (REP_PROTOCOL_SUCCESS);
974		be->be_lastcheck = t;
975	}
976
977	new = sqlite_open(be->be_path, 0600, &errp);
978	if (new == NULL) {
979		backend_panic("reopening %s: %s\n", be->be_path, errp);
980		/*NOTREACHED*/
981	}
982	r = backend_is_readonly(new, be->be_path);
983
984	if (r != SQLITE_OK) {
985		sqlite_close(new);
986		if (writing)
987			return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
988		return (REP_PROTOCOL_SUCCESS);
989	}
990
991	/*
992	 * We can write!  Swap the db handles, mark ourself writable,
993	 * and make a backup.
994	 */
995	sqlite_close(be->be_db);
996	be->be_db = new;
997	be->be_readonly = 0;
998
999	if (backend_create_backup_locked(be, REPOSITORY_BOOT_BACKUP) !=
1000	    REP_PROTOCOL_SUCCESS) {
1001		configd_critical(
1002		    "unable to create \"%s\" backup of \"%s\"\n",
1003		    REPOSITORY_BOOT_BACKUP, be->be_path);
1004	}
1005
1006	return (REP_PROTOCOL_SUCCESS);
1007}
1008
1009/*
1010 * If t is not BACKEND_TYPE_NORMAL, can fail with
1011 *   _BACKEND_ACCESS - backend does not exist
1012 *
1013 * If writing is nonzero, can also fail with
1014 *   _BACKEND_READONLY - backend is read-only
1015 */
1016static int
1017backend_lock(backend_type_t t, int writing, sqlite_backend_t **bep)
1018{
1019	sqlite_backend_t *be = NULL;
1020	hrtime_t ts, vts;
1021
1022	*bep = NULL;
1023
1024	assert(t == BACKEND_TYPE_NORMAL ||
1025	    t == BACKEND_TYPE_NONPERSIST);
1026
1027	be = bes[t];
1028	if (t == BACKEND_TYPE_NORMAL)
1029		assert(be != NULL);		/* should always be there */
1030
1031	if (be == NULL)
1032		return (REP_PROTOCOL_FAIL_BACKEND_ACCESS);
1033
1034	if (backend_panic_thread != 0)
1035		backend_panic(NULL);		/* don't proceed */
1036
1037	ts = gethrtime();
1038	vts = gethrvtime();
1039	(void) pthread_mutex_lock(&be->be_lock);
1040	UPDATE_TOTALS_WR(be, writing, bt_lock, ts, vts);
1041
1042	if (backend_panic_thread != 0) {
1043		(void) pthread_mutex_unlock(&be->be_lock);
1044		backend_panic(NULL);		/* don't proceed */
1045	}
1046	be->be_thread = pthread_self();
1047
1048	if (be->be_readonly) {
1049		int r;
1050		assert(t == BACKEND_TYPE_NORMAL);
1051
1052		r = backend_check_readonly(be, writing, ts);
1053		if (r != REP_PROTOCOL_SUCCESS) {
1054			be->be_thread = 0;
1055			(void) pthread_mutex_unlock(&be->be_lock);
1056			return (r);
1057		}
1058	}
1059
1060	if (backend_do_trace)
1061		(void) sqlite_trace(be->be_db, backend_trace_sql, be);
1062	else
1063		(void) sqlite_trace(be->be_db, NULL, NULL);
1064
1065	be->be_writing = writing;
1066	*bep = be;
1067	return (REP_PROTOCOL_SUCCESS);
1068}
1069
1070static void
1071backend_unlock(sqlite_backend_t *be)
1072{
1073	be->be_writing = 0;
1074	be->be_thread = 0;
1075	(void) pthread_mutex_unlock(&be->be_lock);
1076}
1077
1078static void
1079backend_destroy(sqlite_backend_t *be)
1080{
1081	if (be->be_db != NULL) {
1082		sqlite_close(be->be_db);
1083		be->be_db = NULL;
1084	}
1085	be->be_thread = 0;
1086	(void) pthread_mutex_unlock(&be->be_lock);
1087	(void) pthread_mutex_destroy(&be->be_lock);
1088}
1089
1090static void
1091backend_create_finish(backend_type_t backend_id, sqlite_backend_t *be)
1092{
1093	assert(MUTEX_HELD(&be->be_lock));
1094	assert(be == &be_info[backend_id]);
1095
1096	bes[backend_id] = be;
1097	(void) pthread_mutex_unlock(&be->be_lock);
1098}
1099
1100static int
1101backend_fd_write(int fd, const char *mess)
1102{
1103	int len = strlen(mess);
1104	int written;
1105
1106	while (len > 0) {
1107		if ((written = write(fd, mess, len)) < 0)
1108			return (-1);
1109		mess += written;
1110		len -= written;
1111	}
1112	return (0);
1113}
1114
1115/*
1116 * Can return:
1117 *	_BAD_REQUEST		name is not valid
1118 *	_TRUNCATED		name is too long for current repository path
1119 *	_UNKNOWN		failed for unknown reason (details written to
1120 *				console)
1121 *	_BACKEND_READONLY	backend is not writable
1122 *	_NO_RESOURCES		out of memory
1123 *	_SUCCESS		Backup completed successfully.
1124 */
1125rep_protocol_responseid_t
1126backend_create_backup(const char *name)
1127{
1128	rep_protocol_responseid_t result;
1129	sqlite_backend_t *be;
1130
1131	result = backend_lock(BACKEND_TYPE_NORMAL, 0, &be);
1132	assert(result == REP_PROTOCOL_SUCCESS);
1133
1134	result = backend_create_backup_locked(be, name);
1135	backend_unlock(be);
1136
1137	return (result);
1138}
1139
1140/*
1141 * Copy the repository.  If the sw_back flag is not set, we are
1142 * copying the repository from the default location under /etc/svc to
1143 * the tmpfs /etc/svc/volatile location.  If the flag is set, we are
1144 * copying back to the /etc/svc location from the volatile location
1145 * after manifest-import is completed.
1146 *
1147 * Can return:
1148 *
1149 *	REP_PROTOCOL_SUCCESS		successful copy and rename
1150 *	REP_PROTOCOL_FAIL_UNKNOWN	file operation error
1151 *	REP_PROTOCOL_FAIL_NO_RESOURCES	out of memory
1152 */
1153static rep_protocol_responseid_t
1154backend_switch_copy(const char *src, const char *dst, int sw_back)
1155{
1156	int srcfd, dstfd;
1157	char *tmppath = malloc(PATH_MAX);
1158	rep_protocol_responseid_t res = REP_PROTOCOL_SUCCESS;
1159	struct stat s_buf;
1160	size_t cpsz, sz;
1161
1162	if (tmppath == NULL) {
1163		res = REP_PROTOCOL_FAIL_NO_RESOURCES;
1164		goto out;
1165	}
1166
1167	/*
1168	 * Create and open the related db files
1169	 */
1170	(void) strlcpy(tmppath, dst, PATH_MAX);
1171	sz = strlcat(tmppath, "-XXXXXX", PATH_MAX);
1172	assert(sz < PATH_MAX);
1173	if (sz >= PATH_MAX) {
1174		configd_critical(
1175		    "Backend copy failed: strlcat %s: overflow\n", tmppath);
1176		abort();
1177	}
1178
1179	if ((dstfd = mkstemp(tmppath)) < 0) {
1180		configd_critical("Backend copy failed: mkstemp %s: %s\n",
1181		    tmppath, strerror(errno));
1182		res = REP_PROTOCOL_FAIL_UNKNOWN;
1183		goto out;
1184	}
1185
1186	if ((srcfd = open(src, O_RDONLY)) < 0) {
1187		configd_critical("Backend copy failed: opening %s: %s\n",
1188		    src, strerror(errno));
1189		res = REP_PROTOCOL_FAIL_UNKNOWN;
1190		goto errexit;
1191	}
1192
1193	/*
1194	 * fstat the backend before copy for sanity check.
1195	 */
1196	if (fstat(srcfd, &s_buf) < 0) {
1197		configd_critical("Backend copy failed: fstat %s: %s\n",
1198		    src, strerror(errno));
1199		res = REP_PROTOCOL_FAIL_UNKNOWN;
1200		goto errexit;
1201	}
1202
1203	if ((res = backend_do_copy(src, srcfd, dst, dstfd, &cpsz)) !=
1204	    REP_PROTOCOL_SUCCESS)
1205		goto errexit;
1206
1207	if (cpsz != s_buf.st_size) {
1208		configd_critical("Backend copy failed: incomplete copy\n");
1209		res = REP_PROTOCOL_FAIL_UNKNOWN;
1210		goto errexit;
1211	}
1212
1213	/*
1214	 * Rename tmppath to dst
1215	 */
1216	if (rename(tmppath, dst) < 0) {
1217		configd_critical(
1218		    "Backend copy failed: rename %s to %s: %s\n",
1219		    tmppath, dst, strerror(errno));
1220		res = REP_PROTOCOL_FAIL_UNKNOWN;
1221	}
1222
1223errexit:
1224	if (res != REP_PROTOCOL_SUCCESS && unlink(tmppath) < 0)
1225		configd_critical(
1226		    "Backend copy failed: remove %s: %s\n",
1227		    tmppath, strerror(errno));
1228
1229	(void) close(srcfd);
1230	(void) close(dstfd);
1231
1232out:
1233	free(tmppath);
1234	if (sw_back) {
1235		if (unlink(src) < 0)
1236			configd_critical(
1237			    "Backend copy failed: remove %s: %s\n",
1238			    src, strerror(errno));
1239	}
1240
1241	return (res);
1242}
1243
1244/*
1245 * Perform sanity check on the repository.
1246 * Return 0 if check succeeds or -1 if fails.
1247 */
1248static int
1249backend_switch_check(struct sqlite *be_db, char **errp)
1250{
1251	struct run_single_int_info info;
1252	uint32_t val = -1UL;
1253	int r;
1254
1255	info.rs_out = &val;
1256	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1257
1258	r = sqlite_exec(be_db,
1259	    "SELECT schema_version FROM schema_version;",
1260	    run_single_int_callback, &info, errp);
1261
1262	if (r == SQLITE_OK &&
1263	    info.rs_result != REP_PROTOCOL_FAIL_NOT_FOUND &&
1264	    val == BACKEND_SCHEMA_VERSION)
1265		return (0);
1266	else
1267		return (-1);
1268}
1269
1270/*
1271 * Backend switch entry point.  It is called to perform the backend copy and
1272 * switch from src to dst.  First, it blocks all other clients from accessing
1273 * the repository by calling backend_lock to lock the repository.  Upon
1274 * successful lock, copying and switching of the repository are performed.
1275 *
1276 * Can return:
1277 *	REP_PROTOCOL_SUCCESS			successful switch
1278 *	REP_PROTOCOL_FAIL_BACKEND_ACCESS	backen access fails
1279 *	REP_PROTOCOL_FAIL_BACKEND_READONLY	backend is not writable
1280 *	REP_PROTOCOL_FAIL_UNKNOWN		file operation error
1281 *	REP_PROTOCOL_FAIL_NO_RESOURCES		out of memory
1282 */
1283rep_protocol_responseid_t
1284backend_switch(int sw_back)
1285{
1286	rep_protocol_responseid_t result;
1287	sqlite_backend_t *be;
1288	struct sqlite *new;
1289	char *errp;
1290	const char *dst;
1291
1292	result = backend_lock(BACKEND_TYPE_NORMAL, 1, &be);
1293	if (result != REP_PROTOCOL_SUCCESS)
1294		return (result);
1295
1296	if (sw_back) {
1297		dst = REPOSITORY_DB;
1298	} else {
1299		dst = FAST_REPOSITORY_DB;
1300	}
1301
1302	/*
1303	 * Do the actual copy and rename
1304	 */
1305	result = backend_switch_copy(be->be_path, dst, sw_back);
1306	if (result != REP_PROTOCOL_SUCCESS) {
1307		goto errout;
1308	}
1309
1310	/*
1311	 * Do the backend sanity check and switch
1312	 */
1313	new = sqlite_open(dst, 0600, &errp);
1314	if (new != NULL) {
1315		/*
1316		 * Sanity check
1317		 */
1318		if (backend_switch_check(new, &errp) == 0) {
1319			free((char *)be->be_path);
1320			be->be_path = strdup(dst);
1321			if (be->be_path == NULL) {
1322				configd_critical(
1323				    "Backend switch failed: strdup %s: %s\n",
1324				    dst, strerror(errno));
1325				result = REP_PROTOCOL_FAIL_NO_RESOURCES;
1326				sqlite_close(new);
1327			} else {
1328				sqlite_close(be->be_db);
1329				be->be_db = new;
1330			}
1331		} else {
1332			configd_critical(
1333			    "Backend switch failed: integrity check %s: %s\n",
1334			    dst, errp);
1335			result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1336		}
1337	} else {
1338		configd_critical("Backend switch failed: sqlite_open %s: %s\n",
1339		    dst, errp);
1340		result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1341	}
1342
1343errout:
1344	backend_unlock(be);
1345	return (result);
1346}
1347
1348/*
1349 * This routine is called to attempt the recovery of
1350 * the most recent valid repository if possible when configd
1351 * is restarted for some reasons or when system crashes
1352 * during the switch operation.  The repository databases
1353 * referenced here are indicators of successful switch
1354 * operations.
1355 */
1356static void
1357backend_switch_recovery(void)
1358{
1359	const char *fast_db = FAST_REPOSITORY_DB;
1360	char *errp;
1361	struct stat s_buf;
1362	struct sqlite *be_db;
1363
1364
1365	/*
1366	 * A good transient db containing most recent data can
1367	 * exist if system or svc.configd crashes during the
1368	 * switch operation.  If that is the case, check its
1369	 * integrity and use it.
1370	 */
1371	if (stat(fast_db, &s_buf) < 0) {
1372		return;
1373	}
1374
1375	/*
1376	 * Do sanity check on the db
1377	 */
1378	be_db = sqlite_open(fast_db, 0600, &errp);
1379
1380	if (be_db != NULL) {
1381		if (backend_switch_check(be_db, &errp) == 0)
1382			(void) backend_switch_copy(fast_db, REPOSITORY_DB, 1);
1383	}
1384
1385	(void) unlink(fast_db);
1386}
1387
1388/*ARGSUSED*/
1389static int
1390backend_integrity_callback(void *private, int narg, char **vals, char **cols)
1391{
1392	char **out = private;
1393	char *old = *out;
1394	char *new;
1395	const char *info;
1396	size_t len;
1397	int x;
1398
1399	for (x = 0; x < narg; x++) {
1400		if ((info = vals[x]) != NULL &&
1401		    strcmp(info, "ok") != 0) {
1402			len = (old == NULL)? 0 : strlen(old);
1403			len += strlen(info) + 2;	/* '\n' + '\0' */
1404
1405			new = realloc(old, len);
1406			if (new == NULL)
1407				return (BACKEND_CALLBACK_ABORT);
1408			if (old == NULL)
1409				new[0] = 0;
1410			old = *out = new;
1411			(void) strlcat(new, info, len);
1412			(void) strlcat(new, "\n", len);
1413		}
1414	}
1415	return (BACKEND_CALLBACK_CONTINUE);
1416}
1417
1418#define	BACKEND_CREATE_LOCKED		-2
1419#define	BACKEND_CREATE_FAIL		-1
1420#define	BACKEND_CREATE_SUCCESS		0
1421#define	BACKEND_CREATE_READONLY		1
1422#define	BACKEND_CREATE_NEED_INIT	2
1423static int
1424backend_create(backend_type_t backend_id, const char *db_file,
1425    sqlite_backend_t **bep)
1426{
1427	char *errp;
1428	char *integrity_results = NULL;
1429	sqlite_backend_t *be;
1430	int r;
1431	uint32_t val = -1UL;
1432	struct run_single_int_info info;
1433	int fd;
1434
1435	assert(backend_id >= 0 && backend_id < BACKEND_TYPE_TOTAL);
1436
1437	be = &be_info[backend_id];
1438	assert(be->be_db == NULL);
1439
1440	(void) pthread_mutex_init(&be->be_lock, NULL);
1441	(void) pthread_mutex_lock(&be->be_lock);
1442
1443	be->be_type = backend_id;
1444	be->be_path = strdup(db_file);
1445	if (be->be_path == NULL) {
1446		perror("malloc");
1447		goto fail;
1448	}
1449
1450	be->be_db = sqlite_open(be->be_path, 0600, &errp);
1451
1452	if (be->be_db == NULL) {
1453		if (strstr(errp, "out of memory") != NULL) {
1454			configd_critical("%s: %s\n", db_file, errp);
1455			free(errp);
1456
1457			goto fail;
1458		}
1459
1460		/* report it as an integrity failure */
1461		integrity_results = errp;
1462		errp = NULL;
1463		goto integrity_fail;
1464	}
1465
1466	/*
1467	 * check if we are inited and of the correct schema version
1468	 *
1469	 * Eventually, we'll support schema upgrade here.
1470	 */
1471	info.rs_out = &val;
1472	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1473
1474	r = sqlite_exec(be->be_db, "SELECT schema_version FROM schema_version;",
1475	    run_single_int_callback, &info, &errp);
1476	if (r == SQLITE_ERROR &&
1477	    strcmp("no such table: schema_version", errp) == 0) {
1478		free(errp);
1479		/*
1480		 * Could be an empty repository, could be pre-schema_version
1481		 * schema.  Check for id_tbl, which has always been there.
1482		 */
1483		r = sqlite_exec(be->be_db, "SELECT count() FROM id_tbl;",
1484		    NULL, NULL, &errp);
1485		if (r == SQLITE_ERROR &&
1486		    strcmp("no such table: id_tbl", errp) == 0) {
1487			free(errp);
1488			*bep = be;
1489			return (BACKEND_CREATE_NEED_INIT);
1490		}
1491
1492		configd_critical("%s: schema version mismatch\n", db_file);
1493		goto fail;
1494	}
1495	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1496		free(errp);
1497		*bep = NULL;
1498		backend_destroy(be);
1499		return (BACKEND_CREATE_LOCKED);
1500	}
1501	if (r == SQLITE_OK) {
1502		if (info.rs_result == REP_PROTOCOL_FAIL_NOT_FOUND ||
1503		    val != BACKEND_SCHEMA_VERSION) {
1504			configd_critical("%s: schema version mismatch\n",
1505			    db_file);
1506			goto fail;
1507		}
1508	}
1509
1510	/*
1511	 * pull in the whole database sequentially.
1512	 */
1513	if ((fd = open(db_file, O_RDONLY)) >= 0) {
1514		size_t sz = 64 * 1024;
1515		char *buffer = malloc(sz);
1516		if (buffer != NULL) {
1517			while (read(fd, buffer, sz) > 0)
1518				;
1519			free(buffer);
1520		}
1521		(void) close(fd);
1522	}
1523
1524	/*
1525	 * run an integrity check
1526	 */
1527	r = sqlite_exec(be->be_db, "PRAGMA integrity_check;",
1528	    backend_integrity_callback, &integrity_results, &errp);
1529
1530	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1531		free(errp);
1532		*bep = NULL;
1533		backend_destroy(be);
1534		return (BACKEND_CREATE_LOCKED);
1535	}
1536	if (r == SQLITE_ABORT) {
1537		free(errp);
1538		errp = NULL;
1539		integrity_results = "out of memory running integrity check\n";
1540	} else if (r != SQLITE_OK && integrity_results == NULL) {
1541		integrity_results = errp;
1542		errp = NULL;
1543	}
1544
1545integrity_fail:
1546	if (integrity_results != NULL) {
1547		const char *fname = "/etc/svc/volatile/db_errors";
1548		if ((fd = open(fname, O_CREAT|O_WRONLY|O_APPEND, 0600)) < 0) {
1549			fname = NULL;
1550		} else {
1551			if (backend_fd_write(fd, "\n\n") < 0 ||
1552			    backend_fd_write(fd, db_file) < 0 ||
1553			    backend_fd_write(fd,
1554			    ": PRAGMA integrity_check; failed.  Results:\n") <
1555			    0 || backend_fd_write(fd, integrity_results) < 0 ||
1556			    backend_fd_write(fd, "\n\n") < 0) {
1557				fname = NULL;
1558			}
1559			(void) close(fd);
1560		}
1561
1562		if (!is_main_repository ||
1563		    backend_id == BACKEND_TYPE_NONPERSIST) {
1564			if (fname != NULL)
1565				configd_critical(
1566				    "%s: integrity check failed. Details in "
1567				    "%s\n", db_file, fname);
1568			else
1569				configd_critical(
1570				    "%s: integrity check failed.\n",
1571				    db_file);
1572		} else {
1573			(void) fprintf(stderr,
1574"\n"
1575"svc.configd: smf(5) database integrity check of:\n"
1576"\n"
1577"    %s\n"
1578"\n"
1579"  failed. The database might be damaged or a media error might have\n"
1580"  prevented it from being verified.  Additional information useful to\n"
1581"  your service provider%s%s\n"
1582"\n"
1583"  The system will not be able to boot until you have restored a working\n"
1584"  database.  svc.startd(1M) will provide a sulogin(1M) prompt for recovery\n"
1585"  purposes.  The command:\n"
1586"\n"
1587"    /lib/svc/bin/restore_repository\n"
1588"\n"
1589"  can be run to restore a backup version of your repository.  See\n"
1590"  http://sun.com/msg/SMF-8000-MY for more information.\n"
1591"\n",
1592			    db_file,
1593			    (fname == NULL)? ":\n\n" : " is in:\n\n    ",
1594			    (fname == NULL)? integrity_results : fname);
1595		}
1596		free(errp);
1597		goto fail;
1598	}
1599
1600	/*
1601	 * check if we are writable
1602	 */
1603	r = backend_is_readonly(be->be_db, be->be_path);
1604
1605	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1606		free(errp);
1607		*bep = NULL;
1608		backend_destroy(be);
1609		return (BACKEND_CREATE_LOCKED);
1610	}
1611	if (r != SQLITE_OK && r != SQLITE_FULL) {
1612		free(errp);
1613		be->be_readonly = 1;
1614		*bep = be;
1615		return (BACKEND_CREATE_READONLY);
1616	}
1617	*bep = be;
1618	return (BACKEND_CREATE_SUCCESS);
1619
1620fail:
1621	*bep = NULL;
1622	backend_destroy(be);
1623	return (BACKEND_CREATE_FAIL);
1624}
1625
1626/*
1627 * (arg & -arg) is, through the magic of twos-complement arithmetic, the
1628 * lowest set bit in arg.
1629 */
1630static size_t
1631round_up_to_p2(size_t arg)
1632{
1633	/*
1634	 * Don't allow a zero result.
1635	 */
1636	assert(arg > 0 && ((ssize_t)arg > 0));
1637
1638	while ((arg & (arg - 1)) != 0)
1639		arg += (arg & -arg);
1640
1641	return (arg);
1642}
1643
1644/*
1645 * Returns
1646 *   _NO_RESOURCES - out of memory
1647 *   _BACKEND_ACCESS - backend type t (other than _NORMAL) doesn't exist
1648 *   _DONE - callback aborted query
1649 *   _SUCCESS
1650 */
1651int
1652backend_run(backend_type_t t, backend_query_t *q,
1653    backend_run_callback_f *cb, void *data)
1654{
1655	char *errmsg = NULL;
1656	int ret;
1657	sqlite_backend_t *be;
1658	hrtime_t ts, vts;
1659
1660	if (q == NULL || q->bq_buf == NULL)
1661		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1662
1663	if ((ret = backend_lock(t, 0, &be)) != REP_PROTOCOL_SUCCESS)
1664		return (ret);
1665
1666	ts = gethrtime();
1667	vts = gethrvtime();
1668	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
1669	UPDATE_TOTALS(be, bt_exec, ts, vts);
1670	ret = backend_error(be, ret, errmsg);
1671	backend_unlock(be);
1672
1673	return (ret);
1674}
1675
1676/*
1677 * Starts a "read-only" transaction -- i.e., locks out writers as long
1678 * as it is active.
1679 *
1680 * Fails with
1681 *   _NO_RESOURCES - out of memory
1682 *
1683 * If t is not _NORMAL, can also fail with
1684 *   _BACKEND_ACCESS - backend does not exist
1685 *
1686 * If writable is true, can also fail with
1687 *   _BACKEND_READONLY
1688 */
1689static int
1690backend_tx_begin_common(backend_type_t t, backend_tx_t **txp, int writable)
1691{
1692	backend_tx_t *ret;
1693	sqlite_backend_t *be;
1694	int r;
1695
1696	*txp = NULL;
1697
1698	ret = uu_zalloc(sizeof (*ret));
1699	if (ret == NULL)
1700		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1701
1702	if ((r = backend_lock(t, writable, &be)) != REP_PROTOCOL_SUCCESS) {
1703		uu_free(ret);
1704		return (r);
1705	}
1706
1707	ret->bt_be = be;
1708	ret->bt_readonly = !writable;
1709	ret->bt_type = t;
1710	ret->bt_full = 0;
1711
1712	*txp = ret;
1713	return (REP_PROTOCOL_SUCCESS);
1714}
1715
1716int
1717backend_tx_begin_ro(backend_type_t t, backend_tx_t **txp)
1718{
1719	return (backend_tx_begin_common(t, txp, 0));
1720}
1721
1722static void
1723backend_tx_end(backend_tx_t *tx)
1724{
1725	sqlite_backend_t *be;
1726
1727	be = tx->bt_be;
1728
1729	if (tx->bt_full) {
1730		struct sqlite *new;
1731
1732		/*
1733		 * sqlite tends to be sticky with SQLITE_FULL, so we try
1734		 * to get a fresh database handle if we got a FULL warning
1735		 * along the way.  If that fails, no harm done.
1736		 */
1737		new = sqlite_open(be->be_path, 0600, NULL);
1738		if (new != NULL) {
1739			sqlite_close(be->be_db);
1740			be->be_db = new;
1741		}
1742	}
1743	backend_unlock(be);
1744	tx->bt_be = NULL;
1745	uu_free(tx);
1746}
1747
1748void
1749backend_tx_end_ro(backend_tx_t *tx)
1750{
1751	assert(tx->bt_readonly);
1752	backend_tx_end(tx);
1753}
1754
1755/*
1756 * Fails with
1757 *   _NO_RESOURCES - out of memory
1758 *   _BACKEND_ACCESS
1759 *   _BACKEND_READONLY
1760 */
1761int
1762backend_tx_begin(backend_type_t t, backend_tx_t **txp)
1763{
1764	int r;
1765	char *errmsg;
1766	hrtime_t ts, vts;
1767
1768	r = backend_tx_begin_common(t, txp, 1);
1769	if (r != REP_PROTOCOL_SUCCESS)
1770		return (r);
1771
1772	ts = gethrtime();
1773	vts = gethrvtime();
1774	r = sqlite_exec((*txp)->bt_be->be_db, "BEGIN TRANSACTION", NULL, NULL,
1775	    &errmsg);
1776	UPDATE_TOTALS((*txp)->bt_be, bt_exec, ts, vts);
1777	if (r == SQLITE_FULL)
1778		(*txp)->bt_full = 1;
1779	r = backend_error((*txp)->bt_be, r, errmsg);
1780
1781	if (r != REP_PROTOCOL_SUCCESS) {
1782		assert(r != REP_PROTOCOL_DONE);
1783		(void) sqlite_exec((*txp)->bt_be->be_db,
1784		    "ROLLBACK TRANSACTION", NULL, NULL, NULL);
1785		backend_tx_end(*txp);
1786		*txp = NULL;
1787		return (r);
1788	}
1789
1790	(*txp)->bt_readonly = 0;
1791
1792	return (REP_PROTOCOL_SUCCESS);
1793}
1794
1795void
1796backend_tx_rollback(backend_tx_t *tx)
1797{
1798	int r;
1799	char *errmsg;
1800	sqlite_backend_t *be;
1801	hrtime_t ts, vts;
1802
1803	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1804	be = tx->bt_be;
1805
1806	ts = gethrtime();
1807	vts = gethrvtime();
1808	r = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
1809	    &errmsg);
1810	UPDATE_TOTALS(be, bt_exec, ts, vts);
1811	if (r == SQLITE_FULL)
1812		tx->bt_full = 1;
1813	(void) backend_error(be, r, errmsg);
1814
1815	backend_tx_end(tx);
1816}
1817
1818/*
1819 * Fails with
1820 *   _NO_RESOURCES - out of memory
1821 */
1822int
1823backend_tx_commit(backend_tx_t *tx)
1824{
1825	int r, r2;
1826	char *errmsg;
1827	sqlite_backend_t *be;
1828	hrtime_t ts, vts;
1829
1830	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1831	be = tx->bt_be;
1832	ts = gethrtime();
1833	vts = gethrvtime();
1834	r = sqlite_exec(be->be_db, "COMMIT TRANSACTION", NULL, NULL,
1835	    &errmsg);
1836	UPDATE_TOTALS(be, bt_exec, ts, vts);
1837	if (r == SQLITE_FULL)
1838		tx->bt_full = 1;
1839
1840	r = backend_error(be, r, errmsg);
1841	assert(r != REP_PROTOCOL_DONE);
1842
1843	if (r != REP_PROTOCOL_SUCCESS) {
1844		r2 = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
1845		    &errmsg);
1846		r2 = backend_error(be, r2, errmsg);
1847		if (r2 != REP_PROTOCOL_SUCCESS)
1848			backend_panic("cannot rollback failed commit");
1849
1850		backend_tx_end(tx);
1851		return (r);
1852	}
1853	backend_tx_end(tx);
1854	return (REP_PROTOCOL_SUCCESS);
1855}
1856
1857static const char *
1858id_space_to_name(enum id_space id)
1859{
1860	switch (id) {
1861	case BACKEND_ID_SERVICE_INSTANCE:
1862		return ("SI");
1863	case BACKEND_ID_PROPERTYGRP:
1864		return ("PG");
1865	case BACKEND_ID_GENERATION:
1866		return ("GEN");
1867	case BACKEND_ID_PROPERTY:
1868		return ("PROP");
1869	case BACKEND_ID_VALUE:
1870		return ("VAL");
1871	case BACKEND_ID_SNAPNAME:
1872		return ("SNAME");
1873	case BACKEND_ID_SNAPSHOT:
1874		return ("SHOT");
1875	case BACKEND_ID_SNAPLEVEL:
1876		return ("SLVL");
1877	default:
1878		abort();
1879		/*NOTREACHED*/
1880	}
1881}
1882
1883/*
1884 * Returns a new id or 0 if the id argument is invalid or the query fails.
1885 */
1886uint32_t
1887backend_new_id(backend_tx_t *tx, enum id_space id)
1888{
1889	struct run_single_int_info info;
1890	uint32_t new_id = 0;
1891	const char *name = id_space_to_name(id);
1892	char *errmsg;
1893	int ret;
1894	sqlite_backend_t *be;
1895	hrtime_t ts, vts;
1896
1897	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1898	be = tx->bt_be;
1899
1900	info.rs_out = &new_id;
1901	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1902
1903	ts = gethrtime();
1904	vts = gethrvtime();
1905	ret = sqlite_exec_printf(be->be_db,
1906	    "SELECT id_next FROM id_tbl WHERE (id_name = '%q');"
1907	    "UPDATE id_tbl SET id_next = id_next + 1 WHERE (id_name = '%q');",
1908	    run_single_int_callback, &info, &errmsg, name, name);
1909	UPDATE_TOTALS(be, bt_exec, ts, vts);
1910	if (ret == SQLITE_FULL)
1911		tx->bt_full = 1;
1912
1913	ret = backend_error(be, ret, errmsg);
1914
1915	if (ret != REP_PROTOCOL_SUCCESS) {
1916		return (0);
1917	}
1918
1919	return (new_id);
1920}
1921
1922/*
1923 * Returns
1924 *   _NO_RESOURCES - out of memory
1925 *   _DONE - callback aborted query
1926 *   _SUCCESS
1927 */
1928int
1929backend_tx_run(backend_tx_t *tx, backend_query_t *q,
1930    backend_run_callback_f *cb, void *data)
1931{
1932	char *errmsg = NULL;
1933	int ret;
1934	sqlite_backend_t *be;
1935	hrtime_t ts, vts;
1936
1937	assert(tx != NULL && tx->bt_be != NULL);
1938	be = tx->bt_be;
1939
1940	if (q == NULL || q->bq_buf == NULL)
1941		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1942
1943	ts = gethrtime();
1944	vts = gethrvtime();
1945	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
1946	UPDATE_TOTALS(be, bt_exec, ts, vts);
1947	if (ret == SQLITE_FULL)
1948		tx->bt_full = 1;
1949	ret = backend_error(be, ret, errmsg);
1950
1951	return (ret);
1952}
1953
1954/*
1955 * Returns
1956 *   _NO_RESOURCES - out of memory
1957 *   _NOT_FOUND - the query returned no results
1958 *   _SUCCESS - the query returned a single integer
1959 */
1960int
1961backend_tx_run_single_int(backend_tx_t *tx, backend_query_t *q, uint32_t *buf)
1962{
1963	struct run_single_int_info info;
1964	int ret;
1965
1966	info.rs_out = buf;
1967	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1968
1969	ret = backend_tx_run(tx, q, run_single_int_callback, &info);
1970	assert(ret != REP_PROTOCOL_DONE);
1971
1972	if (ret != REP_PROTOCOL_SUCCESS)
1973		return (ret);
1974
1975	return (info.rs_result);
1976}
1977
1978/*
1979 * Fails with
1980 *   _NO_RESOURCES - out of memory
1981 */
1982int
1983backend_tx_run_update(backend_tx_t *tx, const char *format, ...)
1984{
1985	va_list a;
1986	char *errmsg;
1987	int ret;
1988	sqlite_backend_t *be;
1989	hrtime_t ts, vts;
1990
1991	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1992	be = tx->bt_be;
1993
1994	va_start(a, format);
1995	ts = gethrtime();
1996	vts = gethrvtime();
1997	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
1998	UPDATE_TOTALS(be, bt_exec, ts, vts);
1999	if (ret == SQLITE_FULL)
2000		tx->bt_full = 1;
2001	va_end(a);
2002	ret = backend_error(be, ret, errmsg);
2003	assert(ret != REP_PROTOCOL_DONE);
2004
2005	return (ret);
2006}
2007
2008/*
2009 * returns REP_PROTOCOL_FAIL_NOT_FOUND if no changes occured
2010 */
2011int
2012backend_tx_run_update_changed(backend_tx_t *tx, const char *format, ...)
2013{
2014	va_list a;
2015	char *errmsg;
2016	int ret;
2017	sqlite_backend_t *be;
2018	hrtime_t ts, vts;
2019
2020	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2021	be = tx->bt_be;
2022
2023	va_start(a, format);
2024	ts = gethrtime();
2025	vts = gethrvtime();
2026	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2027	UPDATE_TOTALS(be, bt_exec, ts, vts);
2028	if (ret == SQLITE_FULL)
2029		tx->bt_full = 1;
2030	va_end(a);
2031
2032	ret = backend_error(be, ret, errmsg);
2033
2034	return (ret);
2035}
2036
2037#define	BACKEND_ADD_SCHEMA(be, file, tbls, idxs) \
2038	(backend_add_schema((be), (file), \
2039	    (tbls), sizeof (tbls) / sizeof (*(tbls)), \
2040	    (idxs), sizeof (idxs) / sizeof (*(idxs))))
2041
2042static int
2043backend_add_schema(sqlite_backend_t *be, const char *file,
2044    struct backend_tbl_info *tbls, int tbl_count,
2045    struct backend_idx_info *idxs, int idx_count)
2046{
2047	int i;
2048	char *errmsg;
2049	int ret;
2050
2051	/*
2052	 * Create the tables.
2053	 */
2054	for (i = 0; i < tbl_count; i++) {
2055		if (tbls[i].bti_name == NULL) {
2056			assert(i + 1 == tbl_count);
2057			break;
2058		}
2059		ret = sqlite_exec_printf(be->be_db,
2060		    "CREATE TABLE %s (%s);\n",
2061		    NULL, NULL, &errmsg, tbls[i].bti_name, tbls[i].bti_cols);
2062
2063		if (ret != SQLITE_OK) {
2064			configd_critical(
2065			    "%s: %s table creation fails: %s\n", file,
2066			    tbls[i].bti_name, errmsg);
2067			free(errmsg);
2068			return (-1);
2069		}
2070	}
2071
2072	/*
2073	 * Make indices on key tables and columns.
2074	 */
2075	for (i = 0; i < idx_count; i++) {
2076		if (idxs[i].bxi_tbl == NULL) {
2077			assert(i + 1 == idx_count);
2078			break;
2079		}
2080
2081		ret = sqlite_exec_printf(be->be_db,
2082		    "CREATE INDEX %s_%s ON %s (%s);\n",
2083		    NULL, NULL, &errmsg, idxs[i].bxi_tbl, idxs[i].bxi_idx,
2084		    idxs[i].bxi_tbl, idxs[i].bxi_cols);
2085
2086		if (ret != SQLITE_OK) {
2087			configd_critical(
2088			    "%s: %s_%s index creation fails: %s\n", file,
2089			    idxs[i].bxi_tbl, idxs[i].bxi_idx, errmsg);
2090			free(errmsg);
2091			return (-1);
2092		}
2093	}
2094	return (0);
2095}
2096
2097static int
2098backend_init_schema(sqlite_backend_t *be, const char *db_file, backend_type_t t)
2099{
2100	int i;
2101	char *errmsg;
2102	int ret;
2103
2104	assert(t == BACKEND_TYPE_NORMAL || t == BACKEND_TYPE_NONPERSIST);
2105
2106	if (t == BACKEND_TYPE_NORMAL) {
2107		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_normal, idxs_normal);
2108	} else if (t == BACKEND_TYPE_NONPERSIST) {
2109		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_np, idxs_np);
2110	} else {
2111		abort();		/* can't happen */
2112	}
2113
2114	if (ret < 0) {
2115		return (ret);
2116	}
2117
2118	ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_common, idxs_common);
2119	if (ret < 0) {
2120		return (ret);
2121	}
2122
2123	/*
2124	 * Add the schema version to the table
2125	 */
2126	ret = sqlite_exec_printf(be->be_db,
2127	    "INSERT INTO schema_version (schema_version) VALUES (%d)",
2128	    NULL, NULL, &errmsg, BACKEND_SCHEMA_VERSION);
2129	if (ret != SQLITE_OK) {
2130		configd_critical(
2131		    "setting schema version fails: %s\n", errmsg);
2132		free(errmsg);
2133	}
2134
2135	/*
2136	 * Populate id_tbl with initial IDs.
2137	 */
2138	for (i = 0; i < BACKEND_ID_INVALID; i++) {
2139		const char *name = id_space_to_name(i);
2140
2141		ret = sqlite_exec_printf(be->be_db,
2142		    "INSERT INTO id_tbl (id_name, id_next) "
2143		    "VALUES ('%q', %d);", NULL, NULL, &errmsg, name, 1);
2144		if (ret != SQLITE_OK) {
2145			configd_critical(
2146			    "id insertion for %s fails: %s\n", name, errmsg);
2147			free(errmsg);
2148			return (-1);
2149		}
2150	}
2151	/*
2152	 * Set the persistance of the database.  The normal database is marked
2153	 * "synchronous", so that all writes are synchronized to stable storage
2154	 * before proceeding.
2155	 */
2156	ret = sqlite_exec_printf(be->be_db,
2157	    "PRAGMA default_synchronous = %s; PRAGMA synchronous = %s;",
2158	    NULL, NULL, &errmsg,
2159	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF",
2160	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF");
2161	if (ret != SQLITE_OK) {
2162		configd_critical("pragma setting fails: %s\n", errmsg);
2163		free(errmsg);
2164		return (-1);
2165	}
2166
2167	return (0);
2168}
2169
2170int
2171backend_init(const char *db_file, const char *npdb_file, int have_np)
2172{
2173	sqlite_backend_t *be;
2174	int r;
2175	int writable_persist = 1;
2176
2177	/* set up our temporary directory */
2178	sqlite_temp_directory = "/etc/svc/volatile";
2179
2180	if (strcmp(SQLITE_VERSION, sqlite_version) != 0) {
2181		configd_critical("Mismatched link!  (%s should be %s)\n",
2182		    sqlite_version, SQLITE_VERSION);
2183		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2184	}
2185
2186	/*
2187	 * If the system crashed during a backend switch, there might
2188	 * be a leftover transient database which contains useful
2189	 * information which can be used for recovery.
2190	 */
2191	backend_switch_recovery();
2192
2193	if (db_file == NULL)
2194		db_file = REPOSITORY_DB;
2195	if (strcmp(db_file, REPOSITORY_DB) != 0) {
2196		is_main_repository = 0;
2197	}
2198
2199	r = backend_create(BACKEND_TYPE_NORMAL, db_file, &be);
2200	switch (r) {
2201	case BACKEND_CREATE_FAIL:
2202		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2203	case BACKEND_CREATE_LOCKED:
2204		return (CONFIGD_EXIT_DATABASE_LOCKED);
2205	case BACKEND_CREATE_SUCCESS:
2206		break;		/* success */
2207	case BACKEND_CREATE_READONLY:
2208		writable_persist = 0;
2209		break;
2210	case BACKEND_CREATE_NEED_INIT:
2211		if (backend_init_schema(be, db_file, BACKEND_TYPE_NORMAL)) {
2212			backend_destroy(be);
2213			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2214		}
2215		break;
2216	default:
2217		abort();
2218		/*NOTREACHED*/
2219	}
2220	backend_create_finish(BACKEND_TYPE_NORMAL, be);
2221
2222	if (have_np) {
2223		if (npdb_file == NULL)
2224			npdb_file = NONPERSIST_DB;
2225
2226		r = backend_create(BACKEND_TYPE_NONPERSIST, npdb_file, &be);
2227		switch (r) {
2228		case BACKEND_CREATE_SUCCESS:
2229			break;		/* success */
2230		case BACKEND_CREATE_FAIL:
2231			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2232		case BACKEND_CREATE_LOCKED:
2233			return (CONFIGD_EXIT_DATABASE_LOCKED);
2234		case BACKEND_CREATE_READONLY:
2235			configd_critical("%s: unable to write\n", npdb_file);
2236			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2237		case BACKEND_CREATE_NEED_INIT:
2238			if (backend_init_schema(be, db_file,
2239			    BACKEND_TYPE_NONPERSIST)) {
2240				backend_destroy(be);
2241				return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2242			}
2243			break;
2244		default:
2245			abort();
2246			/*NOTREACHED*/
2247		}
2248		backend_create_finish(BACKEND_TYPE_NONPERSIST, be);
2249
2250		/*
2251		 * If we started up with a writable filesystem, but the
2252		 * non-persistent database needed initialization, we
2253		 * are booting a non-global zone, so do a backup.
2254		 */
2255		if (r == BACKEND_CREATE_NEED_INIT && writable_persist &&
2256		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2257		    REP_PROTOCOL_SUCCESS) {
2258			if (backend_create_backup_locked(be,
2259			    REPOSITORY_BOOT_BACKUP) != REP_PROTOCOL_SUCCESS) {
2260				configd_critical(
2261				    "unable to create \"%s\" backup of "
2262				    "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
2263				    be->be_path);
2264			}
2265			backend_unlock(be);
2266		}
2267	}
2268	return (CONFIGD_EXIT_OKAY);
2269}
2270
2271/*
2272 * quiesce all database activity prior to exiting
2273 */
2274void
2275backend_fini(void)
2276{
2277	sqlite_backend_t *be_normal, *be_np;
2278
2279	(void) backend_lock(BACKEND_TYPE_NORMAL, 1, &be_normal);
2280	(void) backend_lock(BACKEND_TYPE_NONPERSIST, 1, &be_np);
2281}
2282
2283#define	QUERY_BASE	128
2284backend_query_t *
2285backend_query_alloc(void)
2286{
2287	backend_query_t *q;
2288	q = calloc(1, sizeof (backend_query_t));
2289	if (q != NULL) {
2290		q->bq_size = QUERY_BASE;
2291		q->bq_buf = calloc(1, q->bq_size);
2292		if (q->bq_buf == NULL) {
2293			q->bq_size = 0;
2294		}
2295
2296	}
2297	return (q);
2298}
2299
2300void
2301backend_query_append(backend_query_t *q, const char *value)
2302{
2303	char *alloc;
2304	int count;
2305	size_t size, old_len;
2306
2307	if (q == NULL) {
2308		/* We'll discover the error when we try to run the query. */
2309		return;
2310	}
2311
2312	while (q->bq_buf != NULL) {
2313		old_len = strlen(q->bq_buf);
2314		size = q->bq_size;
2315		count = strlcat(q->bq_buf, value, size);
2316
2317		if (count < size)
2318			break;				/* success */
2319
2320		q->bq_buf[old_len] = 0;
2321		size = round_up_to_p2(count + 1);
2322
2323		assert(size > q->bq_size);
2324		alloc = realloc(q->bq_buf, size);
2325		if (alloc == NULL) {
2326			free(q->bq_buf);
2327			q->bq_buf = NULL;
2328			break;				/* can't grow */
2329		}
2330
2331		q->bq_buf = alloc;
2332		q->bq_size = size;
2333	}
2334}
2335
2336void
2337backend_query_add(backend_query_t *q, const char *format, ...)
2338{
2339	va_list args;
2340	char *new;
2341
2342	if (q == NULL || q->bq_buf == NULL)
2343		return;
2344
2345	va_start(args, format);
2346	new = sqlite_vmprintf(format, args);
2347	va_end(args);
2348
2349	if (new == NULL) {
2350		free(q->bq_buf);
2351		q->bq_buf = NULL;
2352		return;
2353	}
2354
2355	backend_query_append(q, new);
2356
2357	free(new);
2358}
2359
2360void
2361backend_query_free(backend_query_t *q)
2362{
2363	if (q != NULL) {
2364		if (q->bq_buf != NULL) {
2365			free(q->bq_buf);
2366		}
2367		free(q);
2368	}
2369}
2370