backend.c revision 11996:91b62f7b8186
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * sqlite is not compatible with _FILE_OFFSET_BITS=64, but we need to
29 * be able to statvfs(2) possibly large systems.  This define gives us
30 * access to the transitional interfaces.  See lfcompile64(5) for how
31 * _LARGEFILE64_SOURCE works.
32 */
33#define	_LARGEFILE64_SOURCE
34
35#include <assert.h>
36#include <atomic.h>
37#include <door.h>
38#include <dirent.h>
39#include <errno.h>
40#include <fcntl.h>
41#include <limits.h>
42#include <pthread.h>
43#include <stdarg.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <strings.h>
48#include <sys/stat.h>
49#include <sys/statvfs.h>
50#include <time.h>
51#include <unistd.h>
52#include <zone.h>
53#include <libscf_priv.h>
54
55#include "configd.h"
56#include "repcache_protocol.h"
57
58#include <sqlite.h>
59#include <sqlite-misc.h>
60
61/*
62 * This file has two purposes:
63 *
64 * 1. It contains the database schema, and the code for setting up our backend
65 *    databases, including installing said schema.
66 *
67 * 2. It provides a simplified interface to the SQL database library, and
68 *    synchronizes MT access to the database.
69 */
70
71#define	IS_VOLATILE(be)		((be)->be_ppath != NULL)
72#define	MAX_FLIGHT_RECORDER_EVENTS	100
73
74typedef enum backend_switch_results {
75	BACKEND_SWITCH_FATAL =	-1,
76	BACKEND_SWITCH_OK =	0,
77	BACKEND_SWITCH_RO
78} backend_switch_results_t;
79
80typedef struct backend_spent {
81	uint64_t bs_count;
82	hrtime_t bs_time;
83	hrtime_t bs_vtime;
84} backend_spent_t;
85
86typedef struct backend_totals {
87	backend_spent_t	bt_lock;	/* waiting for lock */
88	backend_spent_t	bt_exec;	/* time spent executing SQL */
89} backend_totals_t;
90
91/*
92 * There are times when svcadm asks configd to move the BACKEND_TYPE_NORMAL
93 * repository to volatile storage.  See backend_switch().  When the
94 * repository is on volatile storage, we save the location of the permanent
95 * repository in be_ppath.  We use the saved path when the time comes to
96 * move the repository back.  When the repository is on permanent storage,
97 * be_ppath is set to NULL.  Also see the definition of IS_VOLATILE() above
98 * for testing if the repository is on volatile storage.
99 */
100typedef struct sqlite_backend {
101	pthread_mutex_t	be_lock;
102	pthread_t	be_thread;	/* thread holding lock */
103	struct sqlite	*be_db;
104	const char	*be_path;	/* path to db */
105	const char	*be_ppath;	/* saved path to persistent db when */
106					/* backend is volatile */
107	const char	*be_checkpoint;	/* path to repository checkpoint */
108	int		be_readonly;	/* readonly at start, and still is */
109	int		be_writing;	/* held for writing */
110	backend_type_t	be_type;	/* type of db */
111	hrtime_t	be_lastcheck;	/* time of last read-only check */
112	backend_totals_t be_totals[2];	/* one for reading, one for writing */
113} sqlite_backend_t;
114
115struct backend_tx {
116	sqlite_backend_t	*bt_be;
117	int			bt_readonly;
118	int			bt_type;
119	int			bt_full;	/* SQLITE_FULL during tx */
120};
121
122#define	UPDATE_TOTALS_WR(sb, writing, field, ts, vts) { \
123	backend_spent_t *__bsp = &(sb)->be_totals[!!(writing)].field; \
124	__bsp->bs_count++;						\
125	__bsp->bs_time += (gethrtime() - ts);				\
126	__bsp->bs_vtime += (gethrvtime() - vts);			\
127}
128
129#define	UPDATE_TOTALS(sb, field, ts, vts) \
130	UPDATE_TOTALS_WR(sb, (sb)->be_writing, field, ts, vts)
131
132struct backend_query {
133	char	*bq_buf;
134	size_t	bq_size;
135};
136
137struct backend_tbl_info {
138	const char *bti_name;
139	const char *bti_cols;
140};
141
142struct backend_idx_info {
143	const char *bxi_tbl;
144	const char *bxi_idx;
145	const char *bxi_cols;
146};
147
148/* Definitions for the flight recorder: */
149
150typedef enum be_flight_type {
151	BE_FLIGHT_EV_NOEVENT = 0,	/* No event yet recorded. */
152	BE_FLIGHT_EV_BACKUP,		/* Information about repo. backup */
153	BE_FLIGHT_EV_BACKUP_ENTER,	/* Enter */
154					/* backend_create_backup_locked() */
155	BE_FLIGHT_EV_CHECKPOINT,	/* Request to checkpoint repository */
156					/* for boot time backup */
157	BE_FLIGHT_EV_CHECKPOINT_EXISTS,	/* Existing checkpoint detected on */
158					/* restart */
159	BE_FLIGHT_EV_LINGERING_FAST,	/* Use lingering fast repository */
160	BE_FLIGHT_EV_NO_BACKUP,		/* Requested backup not made */
161	BE_FLIGHT_EV_REPO_CREATE,	/* Main repository created */
162	BE_FLIGHT_EV_RESTART,		/* This is a restart of configd */
163	BE_FLIGHT_EV_SWITCH,		/* Switch repositories */
164	BE_FLIGHT_EV_TRANS_RW		/* Root transitioned to read/write */
165} be_flight_type_t;
166
167typedef enum be_flight_status {
168	BE_FLIGHT_ST_INFO = 0,		/* No status.  Event is informative */
169	BE_FLIGHT_ST_BOOT_BACKUP,	/* Boot time backup */
170	BE_FLIGHT_ST_CHECKPOINT_BACKUP,	/* Backup from checkpoint */
171	BE_FLIGHT_ST_CLIENT,		/* Request form client as opposed to */
172					/* internal call */
173	BE_FLIGHT_ST_DUPLICATE,		/* Backup duplicates existing one */
174	BE_FLIGHT_ST_FAIL,		/* Operation failed. */
175	BE_FLIGHT_ST_FAST,		/* Fast repository (tmpfs) */
176	BE_FLIGHT_ST_MI_BACKUP,		/* Manifest-import backup */
177	BE_FLIGHT_ST_NO_SWITCH,		/* Don't switch repositories */
178	BE_FLIGHT_ST_OTHER_BACKUP,	/* Other type of backup */
179	BE_FLIGHT_ST_PERMANENT,		/* Repository on permanet storage */
180	BE_FLIGHT_ST_REPO_BACKUP,	/* Backup from repository */
181	BE_FLIGHT_ST_RO,		/* Main repository is read-only */
182	BE_FLIGHT_ST_RW,		/* Main repository is read/write */
183	BE_FLIGHT_ST_SUCCESS,		/* Operation was successful */
184	BE_FLIGHT_ST_SWITCH		/* Switch repository */
185} be_flight_status_t;
186
187typedef struct be_flight_event {
188	be_flight_type_t	bfe_type;	/* Type of event. */
189	be_flight_status_t	bfe_status;	/* Result of the event. */
190	time_t			bfe_time;	/* Time of the event. */
191	uint_t			bfe_sequence;	/* Sequence number. */
192} be_flight_event_t;
193
194static pthread_mutex_t backend_panic_lock = PTHREAD_MUTEX_INITIALIZER;
195static pthread_cond_t backend_panic_cv = PTHREAD_COND_INITIALIZER;
196pthread_t backend_panic_thread = 0;
197
198int backend_do_trace = 0;		/* invoke tracing callback */
199int backend_print_trace = 0;		/* tracing callback prints SQL */
200int backend_panic_abort = 0;		/* abort when panicking */
201
202/* Data for the flight_recorder. */
203
204static pthread_mutex_t backend_flight_recorder_lock = PTHREAD_MUTEX_INITIALIZER;
205static be_flight_event_t flight_recorder[MAX_FLIGHT_RECORDER_EVENTS];
206static uint_t flight_recorder_next = 0;
207static uint_t flight_recorder_missed = 0;
208static uint_t flight_recorder_sequence = 0;
209
210/* interval between read-only checks while starting up */
211#define	BACKEND_READONLY_CHECK_INTERVAL	(2 * (hrtime_t)NANOSEC)
212
213/*
214 * Any incompatible change to the below schema should bump the version number.
215 * The schema has been changed to support value ordering,  but this change
216 * is backwards-compatible - i.e. a previous svc.configd can use a
217 * repository database with the new schema perfectly well.  As a result,
218 * the schema version has not been updated,  allowing downgrade of systems
219 * without losing repository data.
220 */
221#define	BACKEND_SCHEMA_VERSION		5
222
223static struct backend_tbl_info tbls_normal[] = { /* BACKEND_TYPE_NORMAL */
224	/*
225	 * service_tbl holds all services.  svc_id is the identifier of the
226	 * service.
227	 */
228	{
229		"service_tbl",
230		"svc_id          INTEGER PRIMARY KEY,"
231		"svc_name        CHAR(256) NOT NULL"
232	},
233
234	/*
235	 * instance_tbl holds all of the instances.  The parent service id
236	 * is instance_svc.
237	 */
238	{
239		"instance_tbl",
240		"instance_id     INTEGER PRIMARY KEY,"
241		"instance_name   CHAR(256) NOT NULL,"
242		"instance_svc    INTEGER NOT NULL"
243	},
244
245	/*
246	 * snapshot_lnk_tbl links (instance, snapshot name) with snapshots.
247	 */
248	{
249		"snapshot_lnk_tbl",
250		"lnk_id          INTEGER PRIMARY KEY,"
251		"lnk_inst_id     INTEGER NOT NULL,"
252		"lnk_snap_name   CHAR(256) NOT NULL,"
253		"lnk_snap_id     INTEGER NOT NULL"
254	},
255
256	/*
257	 * snaplevel_tbl maps a snapshot id to a set of named, ordered
258	 * snaplevels.
259	 */
260	{
261		"snaplevel_tbl",
262		"snap_id                 INTEGER NOT NULL,"
263		"snap_level_num          INTEGER NOT NULL,"
264		"snap_level_id           INTEGER NOT NULL,"
265		"snap_level_service_id   INTEGER NOT NULL,"
266		"snap_level_service      CHAR(256) NOT NULL,"
267		"snap_level_instance_id  INTEGER NULL,"
268		"snap_level_instance     CHAR(256) NULL"
269	},
270
271	/*
272	 * snaplevel_lnk_tbl links snaplevels to property groups.
273	 * snaplvl_pg_* is identical to the original property group,
274	 * and snaplvl_gen_id overrides the generation number.
275	 * The service/instance ids are as in the snaplevel.
276	 */
277	{
278		"snaplevel_lnk_tbl",
279		"snaplvl_level_id INTEGER NOT NULL,"
280		"snaplvl_pg_id    INTEGER NOT NULL,"
281		"snaplvl_pg_name  CHAR(256) NOT NULL,"
282		"snaplvl_pg_type  CHAR(256) NOT NULL,"
283		"snaplvl_pg_flags INTEGER NOT NULL,"
284		"snaplvl_gen_id   INTEGER NOT NULL"
285	},
286
287	{ NULL, NULL }
288};
289
290static struct backend_idx_info idxs_normal[] = { /* BACKEND_TYPE_NORMAL */
291	{ "service_tbl",	"name",	"svc_name" },
292	{ "instance_tbl",	"name",	"instance_svc, instance_name" },
293	{ "snapshot_lnk_tbl",	"name",	"lnk_inst_id, lnk_snap_name" },
294	{ "snapshot_lnk_tbl",	"snapid", "lnk_snap_id" },
295	{ "snaplevel_tbl",	"id",	"snap_id" },
296	{ "snaplevel_lnk_tbl",	"id",	"snaplvl_pg_id" },
297	{ "snaplevel_lnk_tbl",	"level", "snaplvl_level_id" },
298	{ NULL, NULL, NULL }
299};
300
301static struct backend_tbl_info tbls_np[] = { /* BACKEND_TYPE_NONPERSIST */
302	{ NULL, NULL }
303};
304
305static struct backend_idx_info idxs_np[] = {	/* BACKEND_TYPE_NONPERSIST */
306	{ NULL, NULL, NULL }
307};
308
309static struct backend_tbl_info tbls_common[] = { /* all backend types */
310	/*
311	 * pg_tbl defines property groups.  They are associated with a single
312	 * service or instance.  The pg_gen_id links them with the latest
313	 * "edited" version of its properties.
314	 */
315	{
316		"pg_tbl",
317		"pg_id           INTEGER PRIMARY KEY,"
318		"pg_parent_id    INTEGER NOT NULL,"
319		"pg_name         CHAR(256) NOT NULL,"
320		"pg_type         CHAR(256) NOT NULL,"
321		"pg_flags        INTEGER NOT NULL,"
322		"pg_gen_id       INTEGER NOT NULL"
323	},
324
325	/*
326	 * prop_lnk_tbl links a particular pg_id and gen_id to a set of
327	 * (prop_name, prop_type, val_id) trios.
328	 */
329	{
330		"prop_lnk_tbl",
331		"lnk_prop_id     INTEGER PRIMARY KEY,"
332		"lnk_pg_id       INTEGER NOT NULL,"
333		"lnk_gen_id      INTEGER NOT NULL,"
334		"lnk_prop_name   CHAR(256) NOT NULL,"
335		"lnk_prop_type   CHAR(2) NOT NULL,"
336		"lnk_val_id      INTEGER"
337	},
338
339	/*
340	 * value_tbl maps a value_id to a set of values.  For any given
341	 * value_id, value_type is constant.  The table definition here
342	 * is repeated in backend_check_upgrade(),  and must be kept in-sync.
343	 */
344	{
345		"value_tbl",
346		"value_id        INTEGER NOT NULL,"
347		"value_type      CHAR(1) NOT NULL,"
348		"value_value     VARCHAR NOT NULL,"
349		"value_order     INTEGER DEFAULT 0"
350	},
351
352	/*
353	 * id_tbl has one row per id space
354	 */
355	{
356		"id_tbl",
357		"id_name         STRING NOT NULL,"
358		"id_next         INTEGER NOT NULL"
359	},
360
361	/*
362	 * schema_version has a single row, which contains
363	 * BACKEND_SCHEMA_VERSION at the time of creation.
364	 */
365	{
366		"schema_version",
367		"schema_version  INTEGER"
368	},
369	{ NULL, NULL }
370};
371
372/*
373 * The indexing of value_tbl is repeated in backend_check_upgrade() and
374 * must be kept in sync with the indexing specification here.
375 */
376static struct backend_idx_info idxs_common[] = { /* all backend types */
377	{ "pg_tbl",		"parent", "pg_parent_id" },
378	{ "pg_tbl",		"name",	"pg_parent_id, pg_name" },
379	{ "pg_tbl",		"type",	"pg_parent_id, pg_type" },
380	{ "prop_lnk_tbl",	"base",	"lnk_pg_id, lnk_gen_id" },
381	{ "prop_lnk_tbl",	"val",	"lnk_val_id" },
382	{ "value_tbl",		"id",	"value_id" },
383	{ "id_tbl",		"id",	"id_name" },
384	{ NULL, NULL, NULL }
385};
386
387struct run_single_int_info {
388	uint32_t	*rs_out;
389	int		rs_result;
390};
391
392static rep_protocol_responseid_t backend_copy_repository(const char *,
393    const char *, int);
394static rep_protocol_responseid_t backend_do_copy(const char *, int,
395    const char *, int, size_t *);
396
397/*
398 * The flight recorder keeps track of events that happen primarily while
399 * the system is booting.  Once the system is up an running, one can take a
400 * gcore(1) of configd and examine the events with mdb.  Since we're most
401 * interested in early boot events, we stop recording events when the
402 * recorder is full.
403 */
404static void
405flight_recorder_event(be_flight_type_t type, be_flight_status_t res)
406{
407	be_flight_event_t *data;
408	uint_t item;
409	uint_t sequence;
410
411	if (pthread_mutex_lock(&backend_flight_recorder_lock) != 0) {
412		atomic_inc_uint(&flight_recorder_missed);
413		return;
414	}
415	if (flight_recorder_next >= MAX_FLIGHT_RECORDER_EVENTS) {
416		/* Hit end of the array.  No more event recording. */
417		item = flight_recorder_next;
418	} else {
419		item = flight_recorder_next++;
420		sequence = flight_recorder_sequence++;
421	}
422	(void) pthread_mutex_unlock(&backend_flight_recorder_lock);
423
424	if (item >= MAX_FLIGHT_RECORDER_EVENTS) {
425		/* Array is filled.  Stop recording events */
426		atomic_inc_uint(&flight_recorder_missed);
427		return;
428	}
429	data = &flight_recorder[item];
430	(void) memset(data, 0, sizeof (*data));
431	data->bfe_type = type;
432	data->bfe_status = res;
433	data->bfe_sequence = sequence;
434	data->bfe_time = time(NULL);
435}
436
437/*ARGSUSED*/
438static int
439run_single_int_callback(void *arg, int columns, char **vals, char **names)
440{
441	struct run_single_int_info *info = arg;
442	uint32_t val;
443
444	char *endptr = vals[0];
445
446	assert(info->rs_result != REP_PROTOCOL_SUCCESS);
447	assert(columns == 1);
448
449	if (vals[0] == NULL)
450		return (BACKEND_CALLBACK_CONTINUE);
451
452	errno = 0;
453	val = strtoul(vals[0], &endptr, 10);
454	if ((val == 0 && endptr == vals[0]) || *endptr != 0 || errno != 0)
455		backend_panic("malformed integer \"%20s\"", vals[0]);
456
457	*info->rs_out = val;
458	info->rs_result = REP_PROTOCOL_SUCCESS;
459	return (BACKEND_CALLBACK_CONTINUE);
460}
461
462/*ARGSUSED*/
463int
464backend_fail_if_seen(void *arg, int columns, char **vals, char **names)
465{
466	return (BACKEND_CALLBACK_ABORT);
467}
468
469/*
470 * check to see if we can successfully start a transaction;  if not, the
471 * filesystem is mounted read-only.
472 */
473static int
474backend_is_readonly(struct sqlite *db, const char *path)
475{
476	int r;
477	statvfs64_t stat;
478
479	if (statvfs64(path, &stat) == 0 && (stat.f_flag & ST_RDONLY))
480		return (SQLITE_READONLY);
481
482	r = sqlite_exec(db,
483	    "BEGIN TRANSACTION; "
484	    "UPDATE schema_version SET schema_version = schema_version; ",
485	    NULL, NULL, NULL);
486	(void) sqlite_exec(db, "ROLLBACK TRANSACTION", NULL, NULL, NULL);
487	return (r);
488}
489
490static void
491backend_trace_sql(void *arg, const char *sql)
492{
493	sqlite_backend_t *be = arg;
494
495	if (backend_print_trace) {
496		(void) fprintf(stderr, "%d: %s\n", be->be_type, sql);
497	}
498}
499
500static sqlite_backend_t be_info[BACKEND_TYPE_TOTAL];
501static sqlite_backend_t *bes[BACKEND_TYPE_TOTAL];
502
503/*
504 * For a native build,  repositories are created from scratch, so upgrade
505 * is not an issue.  This variable is implicitly protected by
506 * bes[BACKEND_TYPE_NORMAL]->be_lock.
507 */
508#ifdef NATIVE_BUILD
509static boolean_t be_normal_upgraded = B_TRUE;
510#else
511static boolean_t be_normal_upgraded = B_FALSE;
512#endif	/* NATIVE_BUILD */
513
514/*
515 * Has backend been upgraded? In nonpersistent case, answer is always
516 * yes.
517 */
518boolean_t
519backend_is_upgraded(backend_tx_t *bt)
520{
521	if (bt->bt_type == BACKEND_TYPE_NONPERSIST)
522		return (B_TRUE);
523	return (be_normal_upgraded);
524}
525
526#define	BACKEND_PANIC_TIMEOUT	(50 * MILLISEC)
527/*
528 * backend_panic() -- some kind of database problem or corruption has been hit.
529 * We attempt to quiesce the other database users -- all of the backend sql
530 * entry points will call backend_panic(NULL) if a panic is in progress, as
531 * will any attempt to start a transaction.
532 *
533 * We give threads holding a backend lock 50ms (BACKEND_PANIC_TIMEOUT) to
534 * either drop the lock or call backend_panic().  If they don't respond in
535 * time, we'll just exit anyway.
536 */
537void
538backend_panic(const char *format, ...)
539{
540	int i;
541	va_list args;
542	int failed = 0;
543
544	(void) pthread_mutex_lock(&backend_panic_lock);
545	if (backend_panic_thread != 0) {
546		(void) pthread_mutex_unlock(&backend_panic_lock);
547		/*
548		 * first, drop any backend locks we're holding, then
549		 * sleep forever on the panic_cv.
550		 */
551		for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
552			if (bes[i] != NULL &&
553			    bes[i]->be_thread == pthread_self())
554				(void) pthread_mutex_unlock(&bes[i]->be_lock);
555		}
556		(void) pthread_mutex_lock(&backend_panic_lock);
557		for (;;)
558			(void) pthread_cond_wait(&backend_panic_cv,
559			    &backend_panic_lock);
560	}
561	backend_panic_thread = pthread_self();
562	(void) pthread_mutex_unlock(&backend_panic_lock);
563
564	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
565		if (bes[i] != NULL && bes[i]->be_thread == pthread_self())
566			(void) pthread_mutex_unlock(&bes[i]->be_lock);
567	}
568
569	va_start(args, format);
570	configd_vcritical(format, args);
571	va_end(args);
572
573	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
574		timespec_t rel;
575
576		rel.tv_sec = 0;
577		rel.tv_nsec = BACKEND_PANIC_TIMEOUT;
578
579		if (bes[i] != NULL && bes[i]->be_thread != pthread_self()) {
580			if (pthread_mutex_reltimedlock_np(&bes[i]->be_lock,
581			    &rel) != 0)
582				failed++;
583		}
584	}
585	if (failed) {
586		configd_critical("unable to quiesce database\n");
587	}
588
589	if (backend_panic_abort)
590		abort();
591
592	exit(CONFIGD_EXIT_DATABASE_BAD);
593}
594
595/*
596 * Returns
597 *   _SUCCESS
598 *   _DONE - callback aborted query
599 *   _NO_RESOURCES - out of memory (_FULL & _TOOBIG?)
600 */
601static int
602backend_error(sqlite_backend_t *be, int error, char *errmsg)
603{
604	if (error == SQLITE_OK)
605		return (REP_PROTOCOL_SUCCESS);
606
607	switch (error) {
608	case SQLITE_ABORT:
609		free(errmsg);
610		return (REP_PROTOCOL_DONE);
611
612	case SQLITE_NOMEM:
613	case SQLITE_FULL:
614	case SQLITE_TOOBIG:
615		free(errmsg);
616		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
617
618	default:
619		backend_panic("%s: db error: %s", be->be_path, errmsg);
620		/*NOTREACHED*/
621	}
622}
623
624static void
625backend_backup_cleanup(const char **out_arg, ssize_t out_sz)
626{
627	char **out = (char **)out_arg;
628
629	while (out_sz-- > 0)
630		free(*out++);
631	free(out_arg);
632}
633
634/*
635 * builds a inverse-time-sorted array of backup files.  The path is a
636 * a single buffer, and the pointers look like:
637 *
638 *	/this/is/a/full/path/to/repository-name-YYYYMMDDHHMMSS
639 *	^pathname		^	       ^(pathname+pathlen)
640 *				basename
641 *
642 * dirname will either be pathname, or ".".
643 *
644 * Returns the number of elements in the array, 0 if there are no previous
645 * backups, or -1 on error.
646 */
647static ssize_t
648backend_backup_get_prev(char *pathname, size_t pathlen, const char ***out_arg)
649{
650	char b_start, b_end;
651	DIR *dir;
652	char **out = NULL;
653	char *name, *p;
654	char *dirname, *basename;
655	char *pathend;
656	struct dirent *ent;
657
658	size_t count = 0;
659	size_t baselen;
660
661	/*
662	 * year, month, day, hour, min, sec, plus an '_'.
663	 */
664	const size_t ndigits = 4 + 5*2 + 1;
665	const size_t baroffset = 4 + 2*2;
666
667	size_t idx;
668
669	pathend = pathname + pathlen;
670	b_end = *pathend;
671	*pathend = '\0';
672
673	basename = strrchr(pathname, '/');
674
675	if (basename != NULL) {
676		assert(pathend > pathname && basename < pathend);
677		basename++;
678		dirname = pathname;
679	} else {
680		basename = pathname;
681		dirname = ".";
682	}
683
684	baselen = strlen(basename);
685
686	/*
687	 * munge the string temporarily for the opendir(), then restore it.
688	 */
689	b_start = basename[0];
690
691	basename[0] = '\0';
692	dir = opendir(dirname);
693	basename[0] = b_start;		/* restore path */
694
695	if (dir == NULL)
696		goto fail;
697
698
699	while ((ent = readdir(dir)) != NULL) {
700		/*
701		 * Must match:
702		 *	basename-YYYYMMDD_HHMMSS
703		 * or we ignore it.
704		 */
705		if (strncmp(ent->d_name, basename, baselen) != 0)
706			continue;
707
708		name = ent->d_name;
709		if (name[baselen] != '-')
710			continue;
711
712		p = name + baselen + 1;
713
714		for (idx = 0; idx < ndigits; idx++) {
715			char c = p[idx];
716			if (idx == baroffset && c != '_')
717				break;
718			if (idx != baroffset && (c < '0' || c > '9'))
719				break;
720		}
721		if (idx != ndigits || p[idx] != '\0')
722			continue;
723
724		/*
725		 * We have a match.  insertion-sort it into our list.
726		 */
727		name = strdup(name);
728		if (name == NULL)
729			goto fail_closedir;
730		p = strrchr(name, '-');
731
732		for (idx = 0; idx < count; idx++) {
733			char *tmp = out[idx];
734			char *tp = strrchr(tmp, '-');
735
736			int cmp = strcmp(p, tp);
737			if (cmp == 0)
738				cmp = strcmp(name, tmp);
739
740			if (cmp == 0) {
741				free(name);
742				name = NULL;
743				break;
744			} else if (cmp > 0) {
745				out[idx] = name;
746				name = tmp;
747				p = tp;
748			}
749		}
750
751		if (idx == count) {
752			char **new_out = realloc(out,
753			    (count + 1) * sizeof (*out));
754
755			if (new_out == NULL) {
756				free(name);
757				goto fail_closedir;
758			}
759
760			out = new_out;
761			out[count++] = name;
762		} else {
763			assert(name == NULL);
764		}
765	}
766	(void) closedir(dir);
767
768	basename[baselen] = b_end;
769
770	*out_arg = (const char **)out;
771	return (count);
772
773fail_closedir:
774	(void) closedir(dir);
775fail:
776	basename[0] = b_start;
777	*pathend = b_end;
778
779	backend_backup_cleanup((const char **)out, count);
780
781	*out_arg = NULL;
782	return (-1);
783}
784
785/*
786 * Copies the repository path into out, a buffer of out_len bytes,
787 * removes the ".db" (or whatever) extension, and, if name is non-NULL,
788 * appends "-name" to it.  If name is non-NULL, it can fail with:
789 *
790 *	_TRUNCATED	will not fit in buffer.
791 *	_BAD_REQUEST	name is not a valid identifier
792 */
793static rep_protocol_responseid_t
794backend_backup_base(sqlite_backend_t *be, const char *name,
795    char *out, size_t out_len)
796{
797	char *p, *q;
798	size_t len;
799
800	/*
801	 * for paths of the form /path/to/foo.db, we truncate at the final
802	 * '.'.
803	 */
804	(void) strlcpy(out, IS_VOLATILE(be) ? be->be_ppath : be->be_path,
805	    out_len);
806
807	p = strrchr(out, '/');
808	q = strrchr(out, '.');
809
810	if (p != NULL && q != NULL && q > p)
811		*q = 0;
812
813	if (name != NULL) {
814		len = strlen(out);
815		assert(len < out_len);
816
817		out += len;
818		out_len -= len;
819
820		len = strlen(name);
821
822		/*
823		 * verify that the name tag is entirely alphabetic,
824		 * non-empty, and not too long.
825		 */
826		if (len == 0 || len >= REP_PROTOCOL_NAME_LEN ||
827		    uu_check_name(name, UU_NAME_DOMAIN) < 0)
828			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
829
830		if (snprintf(out, out_len, "-%s", name) >= out_len)
831			return (REP_PROTOCOL_FAIL_TRUNCATED);
832	}
833
834	return (REP_PROTOCOL_SUCCESS);
835}
836
837/*
838 * Make a checkpoint of the repository, so that we can use it for a backup
839 * when the root file system becomes read/write.  We'll first copy the
840 * repository into a temporary file and then rename it to
841 * REPOSITORY_CHECKPOINT.  This is protection against configd crashing in
842 * the middle of the copy and leaving a partial copy at
843 * REPOSITORY_CHECKPOINT.  Renames are atomic.
844 */
845static rep_protocol_responseid_t
846backend_checkpoint_repository(sqlite_backend_t *be)
847{
848	rep_protocol_responseid_t r;
849
850	assert(be->be_readonly);	/* Only need a checkpoint if / is ro */
851	assert(be->be_type == BACKEND_TYPE_NORMAL);
852	assert(be->be_checkpoint == NULL); /* Only 1 checkpoint */
853
854	r = backend_copy_repository(be->be_path, REPOSITORY_CHECKPOINT, 0);
855	if (r == REP_PROTOCOL_SUCCESS)
856		be->be_checkpoint = REPOSITORY_CHECKPOINT;
857
858	flight_recorder_event(BE_FLIGHT_EV_CHECKPOINT,
859	    r == REP_PROTOCOL_SUCCESS ? BE_FLIGHT_ST_SUCCESS :
860	    BE_FLIGHT_ST_FAIL);
861
862	return (r);
863}
864
865/*
866 * See if a backup is needed.  We do a backup unless both files are
867 * byte-for-byte identical.
868 */
869static int
870backend_check_backup_needed(const char *rep_name, const char *backup_name)
871{
872	int repfd = open(rep_name, O_RDONLY);
873	int fd = open(backup_name, O_RDONLY);
874	struct stat s_rep, s_backup;
875	int c1, c2;
876
877	FILE *f_rep = NULL;
878	FILE *f_backup = NULL;
879
880	if (repfd < 0 || fd < 0)
881		goto fail;
882
883	if (fstat(repfd, &s_rep) < 0 || fstat(fd, &s_backup) < 0)
884		goto fail;
885
886	/*
887	 * if they are the same file, we need to do a backup to break the
888	 * hard link or symlink involved.
889	 */
890	if (s_rep.st_ino == s_backup.st_ino && s_rep.st_dev == s_backup.st_dev)
891		goto fail;
892
893	if (s_rep.st_size != s_backup.st_size)
894		goto fail;
895
896	if ((f_rep = fdopen(repfd, "r")) == NULL ||
897	    (f_backup = fdopen(fd, "r")) == NULL)
898		goto fail;
899
900	do {
901		c1 = getc(f_rep);
902		c2 = getc(f_backup);
903		if (c1 != c2)
904			goto fail;
905	} while (c1 != EOF);
906
907	if (!ferror(f_rep) && !ferror(f_backup)) {
908		(void) fclose(f_rep);
909		(void) fclose(f_backup);
910		(void) close(repfd);
911		(void) close(fd);
912		return (0);
913	}
914
915fail:
916	if (f_rep != NULL)
917		(void) fclose(f_rep);
918	if (f_backup != NULL)
919		(void) fclose(f_backup);
920	if (repfd >= 0)
921		(void) close(repfd);
922	if (fd >= 0)
923		(void) close(fd);
924	return (1);
925}
926
927/*
928 * This interface is called to perform the actual copy
929 *
930 * Return:
931 *	_FAIL_UNKNOWN		read/write fails
932 *	_FAIL_NO_RESOURCES	out of memory
933 *	_SUCCESS		copy succeeds
934 */
935static rep_protocol_responseid_t
936backend_do_copy(const char *src, int srcfd, const char *dst,
937    int dstfd, size_t *sz)
938{
939	char *buf;
940	off_t nrd, nwr, n, r_off = 0, w_off = 0;
941
942	if ((buf = malloc(8192)) == NULL)
943		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
944
945	while ((nrd = read(srcfd, buf, 8192)) != 0) {
946		if (nrd < 0) {
947			if (errno == EINTR)
948				continue;
949
950			configd_critical(
951			    "Backend copy failed: fails to read from %s "
952			    "at offset %d: %s\n", src, r_off, strerror(errno));
953			free(buf);
954			return (REP_PROTOCOL_FAIL_UNKNOWN);
955		}
956
957		r_off += nrd;
958
959		nwr = 0;
960		do {
961			if ((n = write(dstfd, &buf[nwr], nrd - nwr)) < 0) {
962				if (errno == EINTR)
963					continue;
964
965				configd_critical(
966				    "Backend copy failed: fails to write to %s "
967				    "at offset %d: %s\n", dst, w_off,
968				    strerror(errno));
969				free(buf);
970				return (REP_PROTOCOL_FAIL_UNKNOWN);
971			}
972
973			nwr += n;
974			w_off += n;
975
976		} while (nwr < nrd);
977	}
978
979	if (sz)
980		*sz = w_off;
981
982	free(buf);
983	return (REP_PROTOCOL_SUCCESS);
984}
985
986/*
987 * Can return:
988 *	_BAD_REQUEST		name is not valid
989 *	_TRUNCATED		name is too long for current repository path
990 *	_UNKNOWN		failed for unknown reason (details written to
991 *				console)
992 *	_BACKEND_READONLY	backend is not writable
993 *	_NO_RESOURCES		out of memory
994 *	_SUCCESS		Backup completed successfully.
995 */
996static rep_protocol_responseid_t
997backend_create_backup_locked(sqlite_backend_t *be, const char *name)
998{
999	const char **old_list;
1000	ssize_t old_sz;
1001	ssize_t old_max = max_repository_backups;
1002	ssize_t cur;
1003	char *finalname;
1004	char *finalpath;
1005	char *tmppath;
1006	int infd, outfd;
1007	size_t len;
1008	time_t now;
1009	struct tm now_tm;
1010	be_flight_status_t backup_type;
1011	rep_protocol_responseid_t result;
1012	const char *src;
1013	int use_checkpoint;
1014
1015	if (strcmp(name, REPOSITORY_BOOT_BACKUP) == 0) {
1016		backup_type = BE_FLIGHT_ST_BOOT_BACKUP;
1017	} else if (strcmp(name, "manifest_import") ==  0) {
1018		backup_type = BE_FLIGHT_ST_MI_BACKUP;
1019	} else {
1020		backup_type = BE_FLIGHT_ST_OTHER_BACKUP;
1021	}
1022	flight_recorder_event(BE_FLIGHT_EV_BACKUP_ENTER, backup_type);
1023
1024	if ((finalpath = malloc(PATH_MAX)) == NULL)
1025		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1026
1027	if ((tmppath = malloc(PATH_MAX)) == NULL) {
1028		free(finalpath);
1029		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1030	}
1031
1032	if (be->be_readonly) {
1033		flight_recorder_event(BE_FLIGHT_EV_NO_BACKUP, BE_FLIGHT_ST_RO);
1034		result = REP_PROTOCOL_FAIL_BACKEND_READONLY;
1035		goto out;
1036	}
1037
1038	result = backend_backup_base(be, name, finalpath, PATH_MAX);
1039	if (result != REP_PROTOCOL_SUCCESS)
1040		goto out;
1041
1042	/*
1043	 * If this is a boot backup and if we made a checkpoint before the
1044	 * root file system became read/write, then we should use the
1045	 * checkpoint as the source.  Otherwise, we'll use the actual
1046	 * repository as the source.
1047	 */
1048	if (be->be_checkpoint && name &&
1049	    strcmp(REPOSITORY_BOOT_BACKUP, name) == 0) {
1050		backup_type = BE_FLIGHT_ST_CHECKPOINT_BACKUP;
1051		use_checkpoint = 1;
1052		src = be->be_checkpoint;
1053	} else {
1054		backup_type = BE_FLIGHT_ST_REPO_BACKUP;
1055		use_checkpoint = 0;
1056		src = be->be_path;
1057	}
1058	flight_recorder_event(BE_FLIGHT_EV_BACKUP, backup_type);
1059	if (!backend_check_backup_needed(src, finalpath)) {
1060		/*
1061		 * No changes, so there is no need for a backup.
1062		 */
1063		flight_recorder_event(BE_FLIGHT_EV_NO_BACKUP,
1064		    BE_FLIGHT_ST_DUPLICATE);
1065		result = REP_PROTOCOL_SUCCESS;
1066		goto out;
1067	}
1068
1069	/*
1070	 * remember the original length, and the basename location
1071	 */
1072	len = strlen(finalpath);
1073	finalname = strrchr(finalpath, '/');
1074	if (finalname != NULL)
1075		finalname++;
1076	else
1077		finalname = finalpath;
1078
1079	(void) strlcpy(tmppath, finalpath, PATH_MAX);
1080	if (strlcat(tmppath, "-tmpXXXXXX", PATH_MAX) >= PATH_MAX) {
1081		result = REP_PROTOCOL_FAIL_TRUNCATED;
1082		goto out;
1083	}
1084
1085	now = time(NULL);
1086	if (localtime_r(&now, &now_tm) == NULL) {
1087		configd_critical(
1088		    "\"%s\" backup failed: localtime(3C) failed: %s\n", name,
1089		    strerror(errno));
1090		result = REP_PROTOCOL_FAIL_UNKNOWN;
1091		goto out;
1092	}
1093
1094	if (strftime(finalpath + len, PATH_MAX - len,
1095	    "-%Y""%m""%d""_""%H""%M""%S", &now_tm) >= PATH_MAX - len) {
1096		result = REP_PROTOCOL_FAIL_TRUNCATED;
1097		goto out;
1098	}
1099
1100	infd = open(src, O_RDONLY);
1101	if (infd < 0) {
1102		configd_critical("\"%s\" backup failed: opening %s: %s\n", name,
1103		    src, strerror(errno));
1104		result = REP_PROTOCOL_FAIL_UNKNOWN;
1105		goto out;
1106	}
1107
1108	outfd = mkstemp(tmppath);
1109	if (outfd < 0) {
1110		configd_critical("\"%s\" backup failed: mkstemp(%s): %s\n",
1111		    name, tmppath, strerror(errno));
1112		(void) close(infd);
1113		result = REP_PROTOCOL_FAIL_UNKNOWN;
1114		goto out;
1115	}
1116
1117	if ((result = backend_do_copy(src, infd, (const char *)tmppath,
1118	    outfd, NULL)) != REP_PROTOCOL_SUCCESS)
1119		goto fail;
1120
1121	/*
1122	 * grab the old list before doing our re-name.
1123	 */
1124	if (old_max > 0)
1125		old_sz = backend_backup_get_prev(finalpath, len, &old_list);
1126
1127	if (rename(tmppath, finalpath) < 0) {
1128		configd_critical(
1129		    "\"%s\" backup failed: rename(%s, %s): %s\n",
1130		    name, tmppath, finalpath, strerror(errno));
1131		result = REP_PROTOCOL_FAIL_UNKNOWN;
1132		goto fail;
1133	}
1134
1135	tmppath[len] = 0;	/* strip -XXXXXX, for reference symlink */
1136
1137	(void) unlink(tmppath);
1138	if (symlink(finalname, tmppath) < 0) {
1139		configd_critical(
1140		    "\"%s\" backup completed, but updating "
1141		    "\"%s\" symlink to \"%s\" failed: %s\n",
1142		    name, tmppath, finalname, strerror(errno));
1143	}
1144
1145	if (old_max > 0 && old_sz > 0) {
1146		/* unlink all but the first (old_max - 1) files */
1147		for (cur = old_max - 1; cur < old_sz; cur++) {
1148			(void) strlcpy(finalname, old_list[cur],
1149			    PATH_MAX - (finalname - finalpath));
1150			if (unlink(finalpath) < 0)
1151				configd_critical(
1152				    "\"%s\" backup completed, but removing old "
1153				    "file \"%s\" failed: %s\n",
1154				    name, finalpath, strerror(errno));
1155		}
1156
1157		backend_backup_cleanup(old_list, old_sz);
1158	}
1159
1160	result = REP_PROTOCOL_SUCCESS;
1161	flight_recorder_event(BE_FLIGHT_EV_BACKUP, BE_FLIGHT_ST_SUCCESS);
1162
1163fail:
1164	(void) close(infd);
1165	(void) close(outfd);
1166	if (result != REP_PROTOCOL_SUCCESS) {
1167		flight_recorder_event(BE_FLIGHT_EV_BACKUP, BE_FLIGHT_ST_FAIL);
1168		(void) unlink(tmppath);
1169	}
1170
1171out:
1172	/* Get rid of the checkpoint file now that we've used it. */
1173	if (use_checkpoint && (result == REP_PROTOCOL_SUCCESS)) {
1174		(void) unlink(be->be_checkpoint);
1175		be->be_checkpoint = NULL;
1176	}
1177	free(finalpath);
1178	free(tmppath);
1179
1180	return (result);
1181}
1182
1183/*
1184 * Check if value_tbl has been upgraded in the main database,  and
1185 * if not (if the value_order column is not present),  and do_upgrade is true,
1186 * upgrade value_tbl in repository to contain the additional value_order
1187 * column. The version of sqlite used means ALTER TABLE is not
1188 * available, so we cannot simply use "ALTER TABLE value_tbl ADD COLUMN".
1189 * Rather we need to create a temporary table with the additional column,
1190 * import the value_tbl, drop the original value_tbl, recreate the value_tbl
1191 * with the additional column, import the values from value_tbl_tmp,
1192 * reindex and finally drop value_tbl_tmp.  During boot, we wish to check
1193 * if the repository has been upgraded before it is writable,  so that
1194 * property value retrieval can use the appropriate form of the SELECT
1195 * statement that retrieves property values.  As a result, we need to check
1196 * if the repository has been upgraded prior to the point when we can
1197 * actually carry out the update.
1198 */
1199void
1200backend_check_upgrade(sqlite_backend_t *be, boolean_t do_upgrade)
1201{
1202	char *errp;
1203	int r;
1204
1205	if (be_normal_upgraded)
1206		return;
1207	/*
1208	 * Test if upgrade is needed. If value_order column does not exist,
1209	 * we need to upgrade the schema.
1210	 */
1211	r = sqlite_exec(be->be_db, "SELECT value_order FROM value_tbl LIMIT 1;",
1212	    NULL, NULL, NULL);
1213	if (r == SQLITE_ERROR && do_upgrade) {
1214		/* No value_order column - needs upgrade */
1215		configd_info("Upgrading SMF repository format...");
1216		r = sqlite_exec(be->be_db,
1217		    "BEGIN TRANSACTION; "
1218		    "CREATE TABLE value_tbl_tmp ( "
1219		    "value_id   INTEGER NOT NULL, "
1220		    "value_type CHAR(1) NOT NULL, "
1221		    "value_value VARCHAR NOT NULL, "
1222		    "value_order INTEGER DEFAULT 0); "
1223		    "INSERT INTO value_tbl_tmp "
1224		    "(value_id, value_type, value_value) "
1225		    "SELECT value_id, value_type, value_value FROM value_tbl; "
1226		    "DROP TABLE value_tbl; "
1227		    "CREATE TABLE value_tbl( "
1228		    "value_id   INTEGER NOT NULL, "
1229		    "value_type CHAR(1) NOT NULL, "
1230		    "value_value VARCHAR NOT NULL, "
1231		    "value_order INTEGER DEFAULT 0); "
1232		    "INSERT INTO value_tbl SELECT * FROM value_tbl_tmp; "
1233		    "CREATE INDEX value_tbl_id ON value_tbl (value_id); "
1234		    "DROP TABLE value_tbl_tmp; "
1235		    "COMMIT TRANSACTION; "
1236		    "VACUUM; ",
1237		    NULL, NULL, &errp);
1238		if (r == SQLITE_OK) {
1239			configd_info("SMF repository upgrade is complete.");
1240		} else {
1241			backend_panic("%s: repository upgrade failed: %s",
1242			    be->be_path, errp);
1243			/* NOTREACHED */
1244		}
1245	}
1246	if (r == SQLITE_OK)
1247		be_normal_upgraded = B_TRUE;
1248	else
1249		be_normal_upgraded = B_FALSE;
1250}
1251
1252static int
1253backend_check_readonly(sqlite_backend_t *be, int writing, hrtime_t t)
1254{
1255	const char *check_path;
1256	char *errp;
1257	struct sqlite *new;
1258	int r;
1259
1260	assert(be->be_readonly);
1261	assert(be == bes[BACKEND_TYPE_NORMAL]);
1262
1263	/*
1264	 * If we don't *need* to be writable, only check every once in a
1265	 * while.
1266	 */
1267	if (!writing) {
1268		if ((uint64_t)(t - be->be_lastcheck) <
1269		    BACKEND_READONLY_CHECK_INTERVAL)
1270			return (REP_PROTOCOL_SUCCESS);
1271		be->be_lastcheck = t;
1272	}
1273
1274	/*
1275	 * It could be that the repository has been moved to non-persistent
1276	 * storage for performance reasons.  In this case we need to check
1277	 * the persistent path to see if it is writable.  The
1278	 * non-persistent path will always be writable.
1279	 */
1280	check_path = IS_VOLATILE(be) ? be->be_ppath : be->be_path;
1281
1282	new = sqlite_open(check_path, 0600, &errp);
1283	if (new == NULL) {
1284		backend_panic("reopening %s: %s\n", check_path, errp);
1285		/*NOTREACHED*/
1286	}
1287	r = backend_is_readonly(new, check_path);
1288
1289	if (r != SQLITE_OK) {
1290		/*
1291		 * The underlying storage for the permanent repository is
1292		 * still read-only, so we don't want to change the state or
1293		 * move the checkpointed backup if it exists.  On the other
1294		 * hand if the repository has been copied to volatile
1295		 * storage, we'll let our caller go ahead and write to the
1296		 * database.
1297		 */
1298		sqlite_close(new);
1299		if (writing && (IS_VOLATILE(be) == 0))
1300			return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
1301		return (REP_PROTOCOL_SUCCESS);
1302	}
1303
1304	/*
1305	 * We can write!  If the repository is not on volatile storage,
1306	 * swap the db handles.  Mark ourself as writable, upgrade the
1307	 * repository if necessary and make a backup.
1308	 */
1309	be->be_readonly = 0;
1310	flight_recorder_event(BE_FLIGHT_EV_TRANS_RW, BE_FLIGHT_ST_RW);
1311	if (IS_VOLATILE(be)) {
1312		/*
1313		 * If the repository is on volatile storage, don't switch
1314		 * the handles.  We'll continue to use the repository that
1315		 * is on tmpfs until we're told to move it back by one of
1316		 * our clients.  Clients, specifically manifest_import,
1317		 * move the repository to tmpfs for performance reasons,
1318		 * and that is the reason to not switch it back until we're
1319		 * told to do so.
1320		 */
1321		flight_recorder_event(BE_FLIGHT_EV_TRANS_RW,
1322		    BE_FLIGHT_ST_NO_SWITCH);
1323		sqlite_close(new);
1324	} else {
1325		flight_recorder_event(BE_FLIGHT_EV_TRANS_RW,
1326		    BE_FLIGHT_ST_SWITCH);
1327		sqlite_close(be->be_db);
1328		be->be_db = new;
1329	}
1330
1331	if (be->be_type == BACKEND_TYPE_NORMAL)
1332		backend_check_upgrade(be, B_TRUE);
1333
1334	if (backend_create_backup_locked(be, REPOSITORY_BOOT_BACKUP) !=
1335	    REP_PROTOCOL_SUCCESS) {
1336		configd_critical(
1337		    "unable to create \"%s\" backup of \"%s\"\n",
1338		    REPOSITORY_BOOT_BACKUP, be->be_path);
1339	}
1340
1341	return (REP_PROTOCOL_SUCCESS);
1342}
1343
1344/*
1345 * If t is not BACKEND_TYPE_NORMAL, can fail with
1346 *   _BACKEND_ACCESS - backend does not exist
1347 *
1348 * If writing is nonzero, can also fail with
1349 *   _BACKEND_READONLY - backend is read-only
1350 */
1351static int
1352backend_lock(backend_type_t t, int writing, sqlite_backend_t **bep)
1353{
1354	sqlite_backend_t *be = NULL;
1355	hrtime_t ts, vts;
1356
1357	*bep = NULL;
1358
1359	assert(t == BACKEND_TYPE_NORMAL ||
1360	    t == BACKEND_TYPE_NONPERSIST);
1361
1362	be = bes[t];
1363	if (t == BACKEND_TYPE_NORMAL)
1364		assert(be != NULL);		/* should always be there */
1365
1366	if (be == NULL)
1367		return (REP_PROTOCOL_FAIL_BACKEND_ACCESS);
1368
1369	if (backend_panic_thread != 0)
1370		backend_panic(NULL);		/* don't proceed */
1371
1372	ts = gethrtime();
1373	vts = gethrvtime();
1374	(void) pthread_mutex_lock(&be->be_lock);
1375	UPDATE_TOTALS_WR(be, writing, bt_lock, ts, vts);
1376
1377	if (backend_panic_thread != 0) {
1378		(void) pthread_mutex_unlock(&be->be_lock);
1379		backend_panic(NULL);		/* don't proceed */
1380	}
1381	be->be_thread = pthread_self();
1382
1383	if (be->be_readonly) {
1384		int r;
1385		assert(t == BACKEND_TYPE_NORMAL);
1386
1387		r = backend_check_readonly(be, writing, ts);
1388		if (r != REP_PROTOCOL_SUCCESS) {
1389			be->be_thread = 0;
1390			(void) pthread_mutex_unlock(&be->be_lock);
1391			return (r);
1392		}
1393	}
1394
1395	if (backend_do_trace)
1396		(void) sqlite_trace(be->be_db, backend_trace_sql, be);
1397	else
1398		(void) sqlite_trace(be->be_db, NULL, NULL);
1399
1400	be->be_writing = writing;
1401	*bep = be;
1402	return (REP_PROTOCOL_SUCCESS);
1403}
1404
1405static void
1406backend_unlock(sqlite_backend_t *be)
1407{
1408	be->be_writing = 0;
1409	be->be_thread = 0;
1410	(void) pthread_mutex_unlock(&be->be_lock);
1411}
1412
1413static void
1414backend_destroy(sqlite_backend_t *be)
1415{
1416	if (be->be_db != NULL) {
1417		sqlite_close(be->be_db);
1418		be->be_db = NULL;
1419	}
1420	be->be_thread = 0;
1421	(void) pthread_mutex_unlock(&be->be_lock);
1422	(void) pthread_mutex_destroy(&be->be_lock);
1423}
1424
1425static void
1426backend_create_finish(backend_type_t backend_id, sqlite_backend_t *be)
1427{
1428	assert(MUTEX_HELD(&be->be_lock));
1429	assert(be == &be_info[backend_id]);
1430
1431	bes[backend_id] = be;
1432	(void) pthread_mutex_unlock(&be->be_lock);
1433}
1434
1435static int
1436backend_fd_write(int fd, const char *mess)
1437{
1438	int len = strlen(mess);
1439	int written;
1440
1441	while (len > 0) {
1442		if ((written = write(fd, mess, len)) < 0)
1443			return (-1);
1444		mess += written;
1445		len -= written;
1446	}
1447	return (0);
1448}
1449
1450/*
1451 * Can return:
1452 *	_BAD_REQUEST		name is not valid
1453 *	_TRUNCATED		name is too long for current repository path
1454 *	_UNKNOWN		failed for unknown reason (details written to
1455 *				console)
1456 *	_BACKEND_READONLY	backend is not writable
1457 *	_NO_RESOURCES		out of memory
1458 *	_SUCCESS		Backup completed successfully.
1459 */
1460rep_protocol_responseid_t
1461backend_create_backup(const char *name)
1462{
1463	rep_protocol_responseid_t result;
1464	sqlite_backend_t *be;
1465
1466	flight_recorder_event(BE_FLIGHT_EV_BACKUP, BE_FLIGHT_ST_CLIENT);
1467	result = backend_lock(BACKEND_TYPE_NORMAL, 0, &be);
1468	assert(result == REP_PROTOCOL_SUCCESS);
1469
1470	result = backend_create_backup_locked(be, name);
1471	backend_unlock(be);
1472
1473	return (result);
1474}
1475
1476/*
1477 * This function makes a copy of the repository at src, placing the copy at
1478 * dst.  It is used to copy a repository on permanent storage to volatile
1479 * storage or vice versa.  If the source file is on volatile storage, it is
1480 * often times desirable to delete it after the copy has been made and
1481 * verified.  To remove the source repository, set remove_src to 1.
1482 *
1483 * Can return:
1484 *
1485 *	REP_PROTOCOL_SUCCESS		successful copy and rename
1486 *	REP_PROTOCOL_FAIL_UNKNOWN	file operation error
1487 *	REP_PROTOCOL_FAIL_NO_RESOURCES	out of memory
1488 */
1489static rep_protocol_responseid_t
1490backend_copy_repository(const char *src, const char *dst, int remove_src)
1491{
1492	int srcfd, dstfd;
1493	char *tmppath = malloc(PATH_MAX);
1494	rep_protocol_responseid_t res = REP_PROTOCOL_SUCCESS;
1495	struct stat s_buf;
1496	size_t cpsz, sz;
1497
1498	if (tmppath == NULL) {
1499		res = REP_PROTOCOL_FAIL_NO_RESOURCES;
1500		goto out;
1501	}
1502
1503	/*
1504	 * Create and open the related db files
1505	 */
1506	(void) strlcpy(tmppath, dst, PATH_MAX);
1507	sz = strlcat(tmppath, "-XXXXXX", PATH_MAX);
1508	assert(sz < PATH_MAX);
1509	if (sz >= PATH_MAX) {
1510		configd_critical(
1511		    "Backend copy failed: strlcat %s: overflow\n", tmppath);
1512		abort();
1513	}
1514
1515	if ((dstfd = mkstemp(tmppath)) < 0) {
1516		configd_critical("Backend copy failed: mkstemp %s: %s\n",
1517		    tmppath, strerror(errno));
1518		res = REP_PROTOCOL_FAIL_UNKNOWN;
1519		goto out;
1520	}
1521
1522	if ((srcfd = open(src, O_RDONLY)) < 0) {
1523		configd_critical("Backend copy failed: opening %s: %s\n",
1524		    src, strerror(errno));
1525		res = REP_PROTOCOL_FAIL_UNKNOWN;
1526		goto errexit;
1527	}
1528
1529	/*
1530	 * fstat the backend before copy for sanity check.
1531	 */
1532	if (fstat(srcfd, &s_buf) < 0) {
1533		configd_critical("Backend copy failed: fstat %s: %s\n",
1534		    src, strerror(errno));
1535		res = REP_PROTOCOL_FAIL_UNKNOWN;
1536		goto errexit;
1537	}
1538
1539	if ((res = backend_do_copy(src, srcfd, dst, dstfd, &cpsz)) !=
1540	    REP_PROTOCOL_SUCCESS)
1541		goto errexit;
1542
1543	if (cpsz != s_buf.st_size) {
1544		configd_critical("Backend copy failed: incomplete copy\n");
1545		res = REP_PROTOCOL_FAIL_UNKNOWN;
1546		goto errexit;
1547	}
1548
1549	/*
1550	 * Rename tmppath to dst
1551	 */
1552	if (rename(tmppath, dst) < 0) {
1553		configd_critical(
1554		    "Backend copy failed: rename %s to %s: %s\n",
1555		    tmppath, dst, strerror(errno));
1556		res = REP_PROTOCOL_FAIL_UNKNOWN;
1557	}
1558
1559errexit:
1560	if (res != REP_PROTOCOL_SUCCESS && unlink(tmppath) < 0)
1561		configd_critical(
1562		    "Backend copy failed: remove %s: %s\n",
1563		    tmppath, strerror(errno));
1564
1565	(void) close(srcfd);
1566	(void) close(dstfd);
1567
1568out:
1569	free(tmppath);
1570	if (remove_src) {
1571		if (unlink(src) < 0)
1572			configd_critical(
1573			    "Backend copy failed: remove %s: %s\n",
1574			    src, strerror(errno));
1575	}
1576
1577	return (res);
1578}
1579
1580/*
1581 * Perform sanity check on the repository.
1582 * Return 0 if check succeeds or -1 if fails.
1583 */
1584static int
1585backend_switch_check(struct sqlite *be_db, char **errp)
1586{
1587	struct run_single_int_info info;
1588	uint32_t val = -1UL;
1589	int r;
1590
1591	info.rs_out = &val;
1592	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1593
1594	r = sqlite_exec(be_db,
1595	    "SELECT schema_version FROM schema_version;",
1596	    run_single_int_callback, &info, errp);
1597
1598	if (r == SQLITE_OK &&
1599	    info.rs_result != REP_PROTOCOL_FAIL_NOT_FOUND &&
1600	    val == BACKEND_SCHEMA_VERSION)
1601		return (0);
1602	else
1603		return (-1);
1604}
1605
1606/*
1607 * backend_switch() implements the REP_PROTOCOL_SWITCH request from
1608 * clients.  First, it blocks all other clients from accessing the
1609 * repository by calling backend_lock to lock the repository.  It either
1610 * copies the repository from it's permanent storage location
1611 * (REPOSITORY_DB) to its fast volatile location (FAST_REPOSITORY_DB), or
1612 * vice versa.  dir determines the direction of the copy.
1613 *
1614 *	dir = 0	Copy from permanent location to volatile location.
1615 *	dir = 1	Copy from volatile location to permanent location.
1616 *
1617 * Can return:
1618 *	REP_PROTOCOL_SUCCESS			successful switch
1619 *	REP_PROTOCOL_FAIL_BACKEND_ACCESS	backen access fails
1620 *	REP_PROTOCOL_FAIL_BACKEND_READONLY	backend is not writable
1621 *	REP_PROTOCOL_FAIL_UNKNOWN		file operation error
1622 *	REP_PROTOCOL_FAIL_NO_RESOURCES		out of memory
1623 */
1624rep_protocol_responseid_t
1625backend_switch(int dir)
1626{
1627	rep_protocol_responseid_t result;
1628	sqlite_backend_t *be;
1629	struct sqlite *new;
1630	char *errp;
1631	const char *dst;
1632
1633	flight_recorder_event(BE_FLIGHT_EV_SWITCH, BE_FLIGHT_ST_CLIENT);
1634
1635	/*
1636	 * If switching back to the main repository, lock for writing.
1637	 * Otherwise, lock for reading.
1638	 */
1639	result = backend_lock(BACKEND_TYPE_NORMAL, dir ? 1 : 0,
1640	    &be);
1641	if (result != REP_PROTOCOL_SUCCESS)
1642		return (result);
1643
1644	if (dir) {
1645		flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1646		    BE_FLIGHT_ST_PERMANENT);
1647		dst = REPOSITORY_DB;
1648	} else {
1649		flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1650		    BE_FLIGHT_ST_FAST);
1651		dst = FAST_REPOSITORY_DB;
1652	}
1653
1654	/*
1655	 * Do the actual copy and rename
1656	 */
1657	if (strcmp(be->be_path, dst) == 0) {
1658		flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1659		    BE_FLIGHT_ST_DUPLICATE);
1660		result = REP_PROTOCOL_SUCCESS;
1661		goto errout;
1662	}
1663
1664	result = backend_copy_repository(be->be_path, dst, dir);
1665	if (result != REP_PROTOCOL_SUCCESS) {
1666		goto errout;
1667	}
1668
1669	/*
1670	 * Do the backend sanity check and switch
1671	 */
1672	new = sqlite_open(dst, 0600, &errp);
1673	if (new != NULL) {
1674		/*
1675		 * Sanity check
1676		 */
1677		if (backend_switch_check(new, &errp) == 0) {
1678			free((char *)be->be_path);
1679			be->be_path = strdup(dst);
1680			if (be->be_path == NULL) {
1681				configd_critical(
1682				    "Backend switch failed: strdup %s: %s\n",
1683				    dst, strerror(errno));
1684				result = REP_PROTOCOL_FAIL_NO_RESOURCES;
1685				sqlite_close(new);
1686			} else {
1687				sqlite_close(be->be_db);
1688				be->be_db = new;
1689				if (dir) {
1690					/* We're back on permanent storage. */
1691					be->be_ppath = NULL;
1692				} else {
1693					/*
1694					 * Repository is now on volatile
1695					 * storage.  Save the location of
1696					 * the persistent repository.
1697					 */
1698					be->be_ppath = REPOSITORY_DB;
1699				}
1700			}
1701		} else {
1702			configd_critical(
1703			    "Backend switch failed: integrity check %s: %s\n",
1704			    dst, errp);
1705			result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1706		}
1707	} else {
1708		configd_critical("Backend switch failed: sqlite_open %s: %s\n",
1709		    dst, errp);
1710		result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1711	}
1712
1713errout:
1714	if (result == REP_PROTOCOL_SUCCESS) {
1715		flight_recorder_event(BE_FLIGHT_EV_SWITCH,
1716		    BE_FLIGHT_ST_SUCCESS);
1717	} else {
1718		flight_recorder_event(BE_FLIGHT_EV_SWITCH, BE_FLIGHT_ST_FAIL);
1719	}
1720	backend_unlock(be);
1721	return (result);
1722}
1723
1724/*
1725 * This routine is called to attempt the recovery of
1726 * the most recent valid repository if possible when configd
1727 * is restarted for some reasons or when system crashes
1728 * during the switch operation.  The repository databases
1729 * referenced here are indicators of successful switch
1730 * operations.
1731 */
1732static backend_switch_results_t
1733backend_switch_recovery(void)
1734{
1735	const char *fast_db = FAST_REPOSITORY_DB;
1736	char *errp = NULL;
1737	struct stat s_buf;
1738	struct sqlite *be_db;
1739	int r;
1740	backend_switch_results_t res = BACKEND_SWITCH_OK;
1741
1742	/*
1743	 * A good transient db containing most recent data can
1744	 * exist if svc.configd crashes during the
1745	 * switch operation.  If that is the case, check its
1746	 * integrity and use it.
1747	 */
1748	if (stat(fast_db, &s_buf) < 0) {
1749		return (BACKEND_SWITCH_OK);
1750	}
1751
1752	/* Determine if persistent repository is read-only */
1753	be_db = sqlite_open(REPOSITORY_DB, 0600, &errp);
1754	if (be_db == NULL) {
1755		configd_critical("Unable to open \"%s\".  %s\n",
1756		    REPOSITORY_DB, errp == NULL ? "" : errp);
1757		free(errp);
1758		return (BACKEND_SWITCH_FATAL);
1759	}
1760	r = backend_is_readonly(be_db, REPOSITORY_DB);
1761	sqlite_close(be_db);
1762	if (r != SQLITE_OK) {
1763		if (r == SQLITE_READONLY) {
1764			return (BACKEND_SWITCH_RO);
1765		}
1766		return (BACKEND_SWITCH_FATAL);
1767	}
1768
1769	/*
1770	 * Do sanity check on the db
1771	 */
1772	be_db = sqlite_open(fast_db, 0600, &errp);
1773
1774	if (be_db != NULL) {
1775		if (backend_switch_check(be_db, &errp) == 0) {
1776			if (backend_copy_repository(fast_db,
1777			    REPOSITORY_DB, 1) != REP_PROTOCOL_SUCCESS) {
1778				res = BACKEND_SWITCH_FATAL;
1779			}
1780		}
1781		sqlite_close(be_db);
1782	}
1783	free(errp);
1784
1785	/*
1786	 * If we get to this point, the fast_db has either been copied or
1787	 * it is useless.  Either way, get rid of it.
1788	 */
1789	(void) unlink(fast_db);
1790
1791	return (res);
1792}
1793
1794/*ARGSUSED*/
1795static int
1796backend_integrity_callback(void *private, int narg, char **vals, char **cols)
1797{
1798	char **out = private;
1799	char *old = *out;
1800	char *new;
1801	const char *info;
1802	size_t len;
1803	int x;
1804
1805	for (x = 0; x < narg; x++) {
1806		if ((info = vals[x]) != NULL &&
1807		    strcmp(info, "ok") != 0) {
1808			len = (old == NULL)? 0 : strlen(old);
1809			len += strlen(info) + 2;	/* '\n' + '\0' */
1810
1811			new = realloc(old, len);
1812			if (new == NULL)
1813				return (BACKEND_CALLBACK_ABORT);
1814			if (old == NULL)
1815				new[0] = 0;
1816			old = *out = new;
1817			(void) strlcat(new, info, len);
1818			(void) strlcat(new, "\n", len);
1819		}
1820	}
1821	return (BACKEND_CALLBACK_CONTINUE);
1822}
1823
1824#define	BACKEND_CREATE_LOCKED		-2
1825#define	BACKEND_CREATE_FAIL		-1
1826#define	BACKEND_CREATE_SUCCESS		0
1827#define	BACKEND_CREATE_READONLY		1
1828#define	BACKEND_CREATE_NEED_INIT	2
1829static int
1830backend_create(backend_type_t backend_id, const char *db_file,
1831    sqlite_backend_t **bep)
1832{
1833	char *errp;
1834	char *integrity_results = NULL;
1835	sqlite_backend_t *be;
1836	int r;
1837	uint32_t val = -1UL;
1838	struct run_single_int_info info;
1839	int fd;
1840
1841	assert(backend_id >= 0 && backend_id < BACKEND_TYPE_TOTAL);
1842
1843	be = &be_info[backend_id];
1844
1845	assert(be->be_db == NULL);
1846
1847	(void) pthread_mutex_init(&be->be_lock, NULL);
1848	(void) pthread_mutex_lock(&be->be_lock);
1849
1850	be->be_type = backend_id;
1851	be->be_path = strdup(db_file);
1852	if (be->be_path == NULL) {
1853		perror("malloc");
1854		goto fail;
1855	}
1856
1857	be->be_db = sqlite_open(be->be_path, 0600, &errp);
1858
1859	if (be->be_db == NULL) {
1860		if (strstr(errp, "out of memory") != NULL) {
1861			configd_critical("%s: %s\n", db_file, errp);
1862			free(errp);
1863
1864			goto fail;
1865		}
1866
1867		/* report it as an integrity failure */
1868		integrity_results = errp;
1869		errp = NULL;
1870		goto integrity_fail;
1871	}
1872
1873	/*
1874	 * check if we are inited and of the correct schema version
1875	 *
1876	 */
1877	info.rs_out = &val;
1878	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1879
1880	r = sqlite_exec(be->be_db, "SELECT schema_version FROM schema_version;",
1881	    run_single_int_callback, &info, &errp);
1882	if (r == SQLITE_ERROR &&
1883	    strcmp("no such table: schema_version", errp) == 0) {
1884		free(errp);
1885		/*
1886		 * Could be an empty repository, could be pre-schema_version
1887		 * schema.  Check for id_tbl, which has always been there.
1888		 */
1889		r = sqlite_exec(be->be_db, "SELECT count() FROM id_tbl;",
1890		    NULL, NULL, &errp);
1891		if (r == SQLITE_ERROR &&
1892		    strcmp("no such table: id_tbl", errp) == 0) {
1893			free(errp);
1894			*bep = be;
1895			return (BACKEND_CREATE_NEED_INIT);
1896		}
1897
1898		configd_critical("%s: schema version mismatch\n", db_file);
1899		goto fail;
1900	}
1901	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1902		free(errp);
1903		*bep = NULL;
1904		backend_destroy(be);
1905		return (BACKEND_CREATE_LOCKED);
1906	}
1907	if (r == SQLITE_OK) {
1908		if (info.rs_result == REP_PROTOCOL_FAIL_NOT_FOUND ||
1909		    val != BACKEND_SCHEMA_VERSION) {
1910			configd_critical("%s: schema version mismatch\n",
1911			    db_file);
1912			goto fail;
1913		}
1914	}
1915
1916	/*
1917	 * pull in the whole database sequentially.
1918	 */
1919	if ((fd = open(db_file, O_RDONLY)) >= 0) {
1920		size_t sz = 64 * 1024;
1921		char *buffer = malloc(sz);
1922		if (buffer != NULL) {
1923			while (read(fd, buffer, sz) > 0)
1924				;
1925			free(buffer);
1926		}
1927		(void) close(fd);
1928	}
1929
1930	/*
1931	 * run an integrity check
1932	 */
1933	r = sqlite_exec(be->be_db, "PRAGMA integrity_check;",
1934	    backend_integrity_callback, &integrity_results, &errp);
1935
1936	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1937		free(errp);
1938		*bep = NULL;
1939		backend_destroy(be);
1940		return (BACKEND_CREATE_LOCKED);
1941	}
1942	if (r == SQLITE_ABORT) {
1943		free(errp);
1944		errp = NULL;
1945		integrity_results = "out of memory running integrity check\n";
1946	} else if (r != SQLITE_OK && integrity_results == NULL) {
1947		integrity_results = errp;
1948		errp = NULL;
1949	}
1950
1951integrity_fail:
1952	if (integrity_results != NULL) {
1953		const char *fname = "/etc/svc/volatile/db_errors";
1954		if ((fd = open(fname, O_CREAT|O_WRONLY|O_APPEND, 0600)) < 0) {
1955			fname = NULL;
1956		} else {
1957			if (backend_fd_write(fd, "\n\n") < 0 ||
1958			    backend_fd_write(fd, db_file) < 0 ||
1959			    backend_fd_write(fd,
1960			    ": PRAGMA integrity_check; failed.  Results:\n") <
1961			    0 || backend_fd_write(fd, integrity_results) < 0 ||
1962			    backend_fd_write(fd, "\n\n") < 0) {
1963				fname = NULL;
1964			}
1965			(void) close(fd);
1966		}
1967
1968		if (!is_main_repository ||
1969		    backend_id == BACKEND_TYPE_NONPERSIST) {
1970			if (fname != NULL)
1971				configd_critical(
1972				    "%s: integrity check failed. Details in "
1973				    "%s\n", db_file, fname);
1974			else
1975				configd_critical(
1976				    "%s: integrity check failed.\n",
1977				    db_file);
1978		} else {
1979			(void) fprintf(stderr,
1980"\n"
1981"svc.configd: smf(5) database integrity check of:\n"
1982"\n"
1983"    %s\n"
1984"\n"
1985"  failed. The database might be damaged or a media error might have\n"
1986"  prevented it from being verified.  Additional information useful to\n"
1987"  your service provider%s%s\n"
1988"\n"
1989"  The system will not be able to boot until you have restored a working\n"
1990"  database.  svc.startd(1M) will provide a sulogin(1M) prompt for recovery\n"
1991"  purposes.  The command:\n"
1992"\n"
1993"    /lib/svc/bin/restore_repository\n"
1994"\n"
1995"  can be run to restore a backup version of your repository.  See\n"
1996"  http://sun.com/msg/SMF-8000-MY for more information.\n"
1997"\n",
1998			    db_file,
1999			    (fname == NULL)? ":\n\n" : " is in:\n\n    ",
2000			    (fname == NULL)? integrity_results : fname);
2001		}
2002		free(errp);
2003		goto fail;
2004	}
2005
2006	/*
2007	 * Simply do check if backend has been upgraded.  We do not wish
2008	 * to actually carry out upgrade here - the main repository may
2009	 * not be writable at this point.  Actual upgrade is carried out
2010	 * via backend_check_readonly().  This check is done so that
2011	 * we determine repository state - upgraded or not - and then
2012	 * the appropriate SELECT statement (value-ordered or not)
2013	 * can be used when retrieving property values early in boot.
2014	 */
2015	if (backend_id == BACKEND_TYPE_NORMAL)
2016		backend_check_upgrade(be, B_FALSE);
2017	/*
2018	 * check if we are writable
2019	 */
2020	r = backend_is_readonly(be->be_db, be->be_path);
2021
2022	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
2023		free(errp);
2024		*bep = NULL;
2025		backend_destroy(be);
2026		return (BACKEND_CREATE_LOCKED);
2027	}
2028	if (r != SQLITE_OK && r != SQLITE_FULL) {
2029		free(errp);
2030		be->be_readonly = 1;
2031		*bep = be;
2032		return (BACKEND_CREATE_READONLY);
2033	}
2034
2035	*bep = be;
2036	return (BACKEND_CREATE_SUCCESS);
2037
2038fail:
2039	*bep = NULL;
2040	backend_destroy(be);
2041	return (BACKEND_CREATE_FAIL);
2042}
2043
2044/*
2045 * (arg & -arg) is, through the magic of twos-complement arithmetic, the
2046 * lowest set bit in arg.
2047 */
2048static size_t
2049round_up_to_p2(size_t arg)
2050{
2051	/*
2052	 * Don't allow a zero result.
2053	 */
2054	assert(arg > 0 && ((ssize_t)arg > 0));
2055
2056	while ((arg & (arg - 1)) != 0)
2057		arg += (arg & -arg);
2058
2059	return (arg);
2060}
2061
2062/*
2063 * Returns
2064 *   _NO_RESOURCES - out of memory
2065 *   _BACKEND_ACCESS - backend type t (other than _NORMAL) doesn't exist
2066 *   _DONE - callback aborted query
2067 *   _SUCCESS
2068 */
2069int
2070backend_run(backend_type_t t, backend_query_t *q,
2071    backend_run_callback_f *cb, void *data)
2072{
2073	char *errmsg = NULL;
2074	int ret;
2075	sqlite_backend_t *be;
2076	hrtime_t ts, vts;
2077
2078	if (q == NULL || q->bq_buf == NULL)
2079		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2080
2081	if ((ret = backend_lock(t, 0, &be)) != REP_PROTOCOL_SUCCESS)
2082		return (ret);
2083
2084	ts = gethrtime();
2085	vts = gethrvtime();
2086	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
2087	UPDATE_TOTALS(be, bt_exec, ts, vts);
2088	ret = backend_error(be, ret, errmsg);
2089	backend_unlock(be);
2090
2091	return (ret);
2092}
2093
2094/*
2095 * Starts a "read-only" transaction -- i.e., locks out writers as long
2096 * as it is active.
2097 *
2098 * Fails with
2099 *   _NO_RESOURCES - out of memory
2100 *
2101 * If t is not _NORMAL, can also fail with
2102 *   _BACKEND_ACCESS - backend does not exist
2103 *
2104 * If writable is true, can also fail with
2105 *   _BACKEND_READONLY
2106 */
2107static int
2108backend_tx_begin_common(backend_type_t t, backend_tx_t **txp, int writable)
2109{
2110	backend_tx_t *ret;
2111	sqlite_backend_t *be;
2112	int r;
2113
2114	*txp = NULL;
2115
2116	ret = uu_zalloc(sizeof (*ret));
2117	if (ret == NULL)
2118		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2119
2120	if ((r = backend_lock(t, writable, &be)) != REP_PROTOCOL_SUCCESS) {
2121		uu_free(ret);
2122		return (r);
2123	}
2124
2125	ret->bt_be = be;
2126	ret->bt_readonly = !writable;
2127	ret->bt_type = t;
2128	ret->bt_full = 0;
2129
2130	*txp = ret;
2131	return (REP_PROTOCOL_SUCCESS);
2132}
2133
2134int
2135backend_tx_begin_ro(backend_type_t t, backend_tx_t **txp)
2136{
2137	return (backend_tx_begin_common(t, txp, 0));
2138}
2139
2140static void
2141backend_tx_end(backend_tx_t *tx)
2142{
2143	sqlite_backend_t *be;
2144
2145	be = tx->bt_be;
2146
2147	if (tx->bt_full) {
2148		struct sqlite *new;
2149
2150		/*
2151		 * sqlite tends to be sticky with SQLITE_FULL, so we try
2152		 * to get a fresh database handle if we got a FULL warning
2153		 * along the way.  If that fails, no harm done.
2154		 */
2155		new = sqlite_open(be->be_path, 0600, NULL);
2156		if (new != NULL) {
2157			sqlite_close(be->be_db);
2158			be->be_db = new;
2159		}
2160	}
2161	backend_unlock(be);
2162	tx->bt_be = NULL;
2163	uu_free(tx);
2164}
2165
2166void
2167backend_tx_end_ro(backend_tx_t *tx)
2168{
2169	assert(tx->bt_readonly);
2170	backend_tx_end(tx);
2171}
2172
2173/*
2174 * Fails with
2175 *   _NO_RESOURCES - out of memory
2176 *   _BACKEND_ACCESS
2177 *   _BACKEND_READONLY
2178 */
2179int
2180backend_tx_begin(backend_type_t t, backend_tx_t **txp)
2181{
2182	int r;
2183	char *errmsg;
2184	hrtime_t ts, vts;
2185
2186	r = backend_tx_begin_common(t, txp, 1);
2187	if (r != REP_PROTOCOL_SUCCESS)
2188		return (r);
2189
2190	ts = gethrtime();
2191	vts = gethrvtime();
2192	r = sqlite_exec((*txp)->bt_be->be_db, "BEGIN TRANSACTION", NULL, NULL,
2193	    &errmsg);
2194	UPDATE_TOTALS((*txp)->bt_be, bt_exec, ts, vts);
2195	if (r == SQLITE_FULL)
2196		(*txp)->bt_full = 1;
2197	r = backend_error((*txp)->bt_be, r, errmsg);
2198
2199	if (r != REP_PROTOCOL_SUCCESS) {
2200		assert(r != REP_PROTOCOL_DONE);
2201		(void) sqlite_exec((*txp)->bt_be->be_db,
2202		    "ROLLBACK TRANSACTION", NULL, NULL, NULL);
2203		backend_tx_end(*txp);
2204		*txp = NULL;
2205		return (r);
2206	}
2207
2208	(*txp)->bt_readonly = 0;
2209
2210	return (REP_PROTOCOL_SUCCESS);
2211}
2212
2213void
2214backend_tx_rollback(backend_tx_t *tx)
2215{
2216	int r;
2217	char *errmsg;
2218	sqlite_backend_t *be;
2219	hrtime_t ts, vts;
2220
2221	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2222	be = tx->bt_be;
2223
2224	ts = gethrtime();
2225	vts = gethrvtime();
2226	r = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
2227	    &errmsg);
2228	UPDATE_TOTALS(be, bt_exec, ts, vts);
2229	if (r == SQLITE_FULL)
2230		tx->bt_full = 1;
2231	(void) backend_error(be, r, errmsg);
2232
2233	backend_tx_end(tx);
2234}
2235
2236/*
2237 * Fails with
2238 *   _NO_RESOURCES - out of memory
2239 */
2240int
2241backend_tx_commit(backend_tx_t *tx)
2242{
2243	int r, r2;
2244	char *errmsg;
2245	sqlite_backend_t *be;
2246	hrtime_t ts, vts;
2247
2248	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2249	be = tx->bt_be;
2250	ts = gethrtime();
2251	vts = gethrvtime();
2252	r = sqlite_exec(be->be_db, "COMMIT TRANSACTION", NULL, NULL,
2253	    &errmsg);
2254	UPDATE_TOTALS(be, bt_exec, ts, vts);
2255	if (r == SQLITE_FULL)
2256		tx->bt_full = 1;
2257
2258	r = backend_error(be, r, errmsg);
2259	assert(r != REP_PROTOCOL_DONE);
2260
2261	if (r != REP_PROTOCOL_SUCCESS) {
2262		r2 = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
2263		    &errmsg);
2264		r2 = backend_error(be, r2, errmsg);
2265		if (r2 != REP_PROTOCOL_SUCCESS)
2266			backend_panic("cannot rollback failed commit");
2267
2268		backend_tx_end(tx);
2269		return (r);
2270	}
2271	backend_tx_end(tx);
2272	return (REP_PROTOCOL_SUCCESS);
2273}
2274
2275static const char *
2276id_space_to_name(enum id_space id)
2277{
2278	switch (id) {
2279	case BACKEND_ID_SERVICE_INSTANCE:
2280		return ("SI");
2281	case BACKEND_ID_PROPERTYGRP:
2282		return ("PG");
2283	case BACKEND_ID_GENERATION:
2284		return ("GEN");
2285	case BACKEND_ID_PROPERTY:
2286		return ("PROP");
2287	case BACKEND_ID_VALUE:
2288		return ("VAL");
2289	case BACKEND_ID_SNAPNAME:
2290		return ("SNAME");
2291	case BACKEND_ID_SNAPSHOT:
2292		return ("SHOT");
2293	case BACKEND_ID_SNAPLEVEL:
2294		return ("SLVL");
2295	default:
2296		abort();
2297		/*NOTREACHED*/
2298	}
2299}
2300
2301/*
2302 * Returns a new id or 0 if the id argument is invalid or the query fails.
2303 */
2304uint32_t
2305backend_new_id(backend_tx_t *tx, enum id_space id)
2306{
2307	struct run_single_int_info info;
2308	uint32_t new_id = 0;
2309	const char *name = id_space_to_name(id);
2310	char *errmsg;
2311	int ret;
2312	sqlite_backend_t *be;
2313	hrtime_t ts, vts;
2314
2315	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2316	be = tx->bt_be;
2317
2318	info.rs_out = &new_id;
2319	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
2320
2321	ts = gethrtime();
2322	vts = gethrvtime();
2323	ret = sqlite_exec_printf(be->be_db,
2324	    "SELECT id_next FROM id_tbl WHERE (id_name = '%q');"
2325	    "UPDATE id_tbl SET id_next = id_next + 1 WHERE (id_name = '%q');",
2326	    run_single_int_callback, &info, &errmsg, name, name);
2327	UPDATE_TOTALS(be, bt_exec, ts, vts);
2328	if (ret == SQLITE_FULL)
2329		tx->bt_full = 1;
2330
2331	ret = backend_error(be, ret, errmsg);
2332
2333	if (ret != REP_PROTOCOL_SUCCESS) {
2334		return (0);
2335	}
2336
2337	return (new_id);
2338}
2339
2340/*
2341 * Returns
2342 *   _NO_RESOURCES - out of memory
2343 *   _DONE - callback aborted query
2344 *   _SUCCESS
2345 */
2346int
2347backend_tx_run(backend_tx_t *tx, backend_query_t *q,
2348    backend_run_callback_f *cb, void *data)
2349{
2350	char *errmsg = NULL;
2351	int ret;
2352	sqlite_backend_t *be;
2353	hrtime_t ts, vts;
2354
2355	assert(tx != NULL && tx->bt_be != NULL);
2356	be = tx->bt_be;
2357
2358	if (q == NULL || q->bq_buf == NULL)
2359		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
2360
2361	ts = gethrtime();
2362	vts = gethrvtime();
2363	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
2364	UPDATE_TOTALS(be, bt_exec, ts, vts);
2365	if (ret == SQLITE_FULL)
2366		tx->bt_full = 1;
2367	ret = backend_error(be, ret, errmsg);
2368
2369	return (ret);
2370}
2371
2372/*
2373 * Returns
2374 *   _NO_RESOURCES - out of memory
2375 *   _NOT_FOUND - the query returned no results
2376 *   _SUCCESS - the query returned a single integer
2377 */
2378int
2379backend_tx_run_single_int(backend_tx_t *tx, backend_query_t *q, uint32_t *buf)
2380{
2381	struct run_single_int_info info;
2382	int ret;
2383
2384	info.rs_out = buf;
2385	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
2386
2387	ret = backend_tx_run(tx, q, run_single_int_callback, &info);
2388	assert(ret != REP_PROTOCOL_DONE);
2389
2390	if (ret != REP_PROTOCOL_SUCCESS)
2391		return (ret);
2392
2393	return (info.rs_result);
2394}
2395
2396/*
2397 * Fails with
2398 *   _NO_RESOURCES - out of memory
2399 */
2400int
2401backend_tx_run_update(backend_tx_t *tx, const char *format, ...)
2402{
2403	va_list a;
2404	char *errmsg;
2405	int ret;
2406	sqlite_backend_t *be;
2407	hrtime_t ts, vts;
2408
2409	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2410	be = tx->bt_be;
2411
2412	va_start(a, format);
2413	ts = gethrtime();
2414	vts = gethrvtime();
2415	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2416	UPDATE_TOTALS(be, bt_exec, ts, vts);
2417	if (ret == SQLITE_FULL)
2418		tx->bt_full = 1;
2419	va_end(a);
2420	ret = backend_error(be, ret, errmsg);
2421	assert(ret != REP_PROTOCOL_DONE);
2422
2423	return (ret);
2424}
2425
2426/*
2427 * returns REP_PROTOCOL_FAIL_NOT_FOUND if no changes occured
2428 */
2429int
2430backend_tx_run_update_changed(backend_tx_t *tx, const char *format, ...)
2431{
2432	va_list a;
2433	char *errmsg;
2434	int ret;
2435	sqlite_backend_t *be;
2436	hrtime_t ts, vts;
2437
2438	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2439	be = tx->bt_be;
2440
2441	va_start(a, format);
2442	ts = gethrtime();
2443	vts = gethrvtime();
2444	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2445	UPDATE_TOTALS(be, bt_exec, ts, vts);
2446	if (ret == SQLITE_FULL)
2447		tx->bt_full = 1;
2448	va_end(a);
2449
2450	ret = backend_error(be, ret, errmsg);
2451
2452	return (ret);
2453}
2454
2455#define	BACKEND_ADD_SCHEMA(be, file, tbls, idxs) \
2456	(backend_add_schema((be), (file), \
2457	    (tbls), sizeof (tbls) / sizeof (*(tbls)), \
2458	    (idxs), sizeof (idxs) / sizeof (*(idxs))))
2459
2460static int
2461backend_add_schema(sqlite_backend_t *be, const char *file,
2462    struct backend_tbl_info *tbls, int tbl_count,
2463    struct backend_idx_info *idxs, int idx_count)
2464{
2465	int i;
2466	char *errmsg;
2467	int ret;
2468
2469	/*
2470	 * Create the tables.
2471	 */
2472	for (i = 0; i < tbl_count; i++) {
2473		if (tbls[i].bti_name == NULL) {
2474			assert(i + 1 == tbl_count);
2475			break;
2476		}
2477		ret = sqlite_exec_printf(be->be_db,
2478		    "CREATE TABLE %s (%s);\n",
2479		    NULL, NULL, &errmsg, tbls[i].bti_name, tbls[i].bti_cols);
2480
2481		if (ret != SQLITE_OK) {
2482			configd_critical(
2483			    "%s: %s table creation fails: %s\n", file,
2484			    tbls[i].bti_name, errmsg);
2485			free(errmsg);
2486			return (-1);
2487		}
2488	}
2489
2490	/*
2491	 * Make indices on key tables and columns.
2492	 */
2493	for (i = 0; i < idx_count; i++) {
2494		if (idxs[i].bxi_tbl == NULL) {
2495			assert(i + 1 == idx_count);
2496			break;
2497		}
2498
2499		ret = sqlite_exec_printf(be->be_db,
2500		    "CREATE INDEX %s_%s ON %s (%s);\n",
2501		    NULL, NULL, &errmsg, idxs[i].bxi_tbl, idxs[i].bxi_idx,
2502		    idxs[i].bxi_tbl, idxs[i].bxi_cols);
2503
2504		if (ret != SQLITE_OK) {
2505			configd_critical(
2506			    "%s: %s_%s index creation fails: %s\n", file,
2507			    idxs[i].bxi_tbl, idxs[i].bxi_idx, errmsg);
2508			free(errmsg);
2509			return (-1);
2510		}
2511	}
2512	return (0);
2513}
2514
2515static int
2516backend_init_schema(sqlite_backend_t *be, const char *db_file, backend_type_t t)
2517{
2518	int i;
2519	char *errmsg;
2520	int ret;
2521
2522	assert(t == BACKEND_TYPE_NORMAL || t == BACKEND_TYPE_NONPERSIST);
2523
2524	if (t == BACKEND_TYPE_NORMAL) {
2525		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_normal, idxs_normal);
2526	} else if (t == BACKEND_TYPE_NONPERSIST) {
2527		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_np, idxs_np);
2528	} else {
2529		abort();		/* can't happen */
2530	}
2531
2532	if (ret < 0) {
2533		return (ret);
2534	}
2535
2536	ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_common, idxs_common);
2537	if (ret < 0) {
2538		return (ret);
2539	}
2540
2541	/*
2542	 * Add the schema version to the table
2543	 */
2544	ret = sqlite_exec_printf(be->be_db,
2545	    "INSERT INTO schema_version (schema_version) VALUES (%d)",
2546	    NULL, NULL, &errmsg, BACKEND_SCHEMA_VERSION);
2547	if (ret != SQLITE_OK) {
2548		configd_critical(
2549		    "setting schema version fails: %s\n", errmsg);
2550		free(errmsg);
2551	}
2552
2553	/*
2554	 * Populate id_tbl with initial IDs.
2555	 */
2556	for (i = 0; i < BACKEND_ID_INVALID; i++) {
2557		const char *name = id_space_to_name(i);
2558
2559		ret = sqlite_exec_printf(be->be_db,
2560		    "INSERT INTO id_tbl (id_name, id_next) "
2561		    "VALUES ('%q', %d);", NULL, NULL, &errmsg, name, 1);
2562		if (ret != SQLITE_OK) {
2563			configd_critical(
2564			    "id insertion for %s fails: %s\n", name, errmsg);
2565			free(errmsg);
2566			return (-1);
2567		}
2568	}
2569	/*
2570	 * Set the persistance of the database.  The normal database is marked
2571	 * "synchronous", so that all writes are synchronized to stable storage
2572	 * before proceeding.
2573	 */
2574	ret = sqlite_exec_printf(be->be_db,
2575	    "PRAGMA default_synchronous = %s; PRAGMA synchronous = %s;",
2576	    NULL, NULL, &errmsg,
2577	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF",
2578	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF");
2579	if (ret != SQLITE_OK) {
2580		configd_critical("pragma setting fails: %s\n", errmsg);
2581		free(errmsg);
2582		return (-1);
2583	}
2584
2585	return (0);
2586}
2587
2588int
2589backend_init(const char *db_file, const char *npdb_file, int have_np)
2590{
2591	sqlite_backend_t *be;
2592	char *errp;
2593	struct sqlite *fast_db;
2594	int r;
2595	backend_switch_results_t switch_result = BACKEND_SWITCH_OK;
2596	int writable_persist = 1;
2597
2598	/* set up our temporary directory */
2599	sqlite_temp_directory = "/etc/svc/volatile";
2600
2601	if (strcmp(SQLITE_VERSION, sqlite_version) != 0) {
2602		configd_critical("Mismatched link!  (%s should be %s)\n",
2603		    sqlite_version, SQLITE_VERSION);
2604		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2605	}
2606
2607	if (db_file == NULL)
2608		db_file = REPOSITORY_DB;
2609	if (strcmp(db_file, REPOSITORY_DB) != 0) {
2610		is_main_repository = 0;
2611	}
2612
2613	/*
2614	 * If the svc.configd crashed, there might be a leftover transient
2615	 * database at FAST_REPOSITORY_DB,which contains useful
2616	 * information.  Both early manifest import and late manifest
2617	 * import use svcadm to copy the repository to FAST_REPOSITORY_DB.
2618	 * One reason for doing this is that it improves the performance of
2619	 * manifest import.  The other reason is that the repository may be
2620	 * on read-only root in the case of early manifest import.
2621	 *
2622	 * If FAST_REPOSITORY_DB exists, it is an indication that
2623	 * svc.configd has been restarted for some reason.  Since we have
2624	 * no way of knowing where we are in the boot process, the safe
2625	 * thing to do is to move the repository back to it's non-transient
2626	 * location, REPOSITORY_DB.  This may slow manifest import
2627	 * performance, but it avoids the problem of missing the command to
2628	 * move the repository to permanent storage.
2629	 *
2630	 * There is a caveat, though.  If root is read-only, we'll need to
2631	 * leave the repository at FAST_REPOSITORY_DB.  If root is
2632	 * read-only, late manifest import has not yet run, so it will move
2633	 * the repository back to permanent storage when it runs.
2634	 */
2635	if (is_main_repository)
2636		switch_result = backend_switch_recovery();
2637
2638	r = backend_create(BACKEND_TYPE_NORMAL, db_file, &be);
2639	switch (r) {
2640	case BACKEND_CREATE_FAIL:
2641		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2642	case BACKEND_CREATE_LOCKED:
2643		return (CONFIGD_EXIT_DATABASE_LOCKED);
2644	case BACKEND_CREATE_SUCCESS:
2645		break;		/* success */
2646	case BACKEND_CREATE_READONLY:
2647		writable_persist = 0;
2648		break;
2649	case BACKEND_CREATE_NEED_INIT:
2650		if (backend_init_schema(be, db_file, BACKEND_TYPE_NORMAL)) {
2651			backend_destroy(be);
2652			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2653		}
2654		break;
2655	default:
2656		abort();
2657		/*NOTREACHED*/
2658	}
2659	backend_create_finish(BACKEND_TYPE_NORMAL, be);
2660	flight_recorder_event(BE_FLIGHT_EV_REPO_CREATE,
2661	    writable_persist == 1 ? BE_FLIGHT_ST_RW : BE_FLIGHT_ST_RO);
2662	/*
2663	 * If there was a transient repository that could not be copied
2664	 * back because the root file system was read-only, switch over to
2665	 * using the transient repository.
2666	 */
2667	if (switch_result == BACKEND_SWITCH_RO) {
2668		char *db_name_copy = NULL;
2669
2670		fast_db = sqlite_open(FAST_REPOSITORY_DB, 0600, &errp);
2671		if (fast_db == NULL) {
2672			/* Can't open fast repository.  Stick with permanent. */
2673			configd_critical("Cannot open \"%s\".  %s\n",
2674			    FAST_REPOSITORY_DB, errp == NULL ? "" : errp);
2675			free(errp);
2676		} else {
2677			db_name_copy = strdup(FAST_REPOSITORY_DB);
2678			if (db_name_copy == NULL) {
2679				configd_critical("backend_init: out of "
2680				    "memory.\n");
2681				sqlite_close(fast_db);
2682				return (CONFIGD_EXIT_INIT_FAILED);
2683			} else {
2684				flight_recorder_event(
2685				    BE_FLIGHT_EV_LINGERING_FAST,
2686				    BE_FLIGHT_ST_RO);
2687				sqlite_close(be->be_db);
2688				be->be_db = fast_db;
2689				be->be_ppath = be->be_path;
2690				be->be_path = db_name_copy;
2691			}
2692		}
2693	}
2694
2695	if (have_np) {
2696		if (npdb_file == NULL)
2697			npdb_file = NONPERSIST_DB;
2698
2699		r = backend_create(BACKEND_TYPE_NONPERSIST, npdb_file, &be);
2700		switch (r) {
2701		case BACKEND_CREATE_SUCCESS:
2702			break;		/* success */
2703		case BACKEND_CREATE_FAIL:
2704			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2705		case BACKEND_CREATE_LOCKED:
2706			return (CONFIGD_EXIT_DATABASE_LOCKED);
2707		case BACKEND_CREATE_READONLY:
2708			configd_critical("%s: unable to write\n", npdb_file);
2709			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2710		case BACKEND_CREATE_NEED_INIT:
2711			if (backend_init_schema(be, db_file,
2712			    BACKEND_TYPE_NONPERSIST)) {
2713				backend_destroy(be);
2714				return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2715			}
2716			break;
2717		default:
2718			abort();
2719			/*NOTREACHED*/
2720		}
2721		backend_create_finish(BACKEND_TYPE_NONPERSIST, be);
2722
2723		if (r != BACKEND_CREATE_NEED_INIT) {
2724			flight_recorder_event(BE_FLIGHT_EV_RESTART,
2725			    BE_FLIGHT_ST_INFO);
2726		}
2727
2728		/*
2729		 * If we started up with a writable filesystem, but the
2730		 * non-persistent database needed initialization, we are
2731		 * booting a non-global zone or a system with a writable
2732		 * root (ZFS), so do a backup.  Checking to see if the
2733		 * non-persistent database needed initialization also keeps
2734		 * us from making additional backups if configd gets
2735		 * restarted.
2736		 */
2737		if (r == BACKEND_CREATE_NEED_INIT && writable_persist &&
2738		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2739		    REP_PROTOCOL_SUCCESS) {
2740			if (backend_create_backup_locked(be,
2741			    REPOSITORY_BOOT_BACKUP) != REP_PROTOCOL_SUCCESS) {
2742				configd_critical(
2743				    "unable to create \"%s\" backup of "
2744				    "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
2745				    be->be_path);
2746			}
2747			backend_unlock(be);
2748		}
2749
2750		/*
2751		 * On the other hand if we started with a read-only file
2752		 * system and the non-persistent database needed
2753		 * initialization, then we need to take a checkpoint of the
2754		 * repository.  We grab the checkpoint now before Early
2755		 * Manifest Import starts modifying the repository.  Then
2756		 * when the file system becomes writable, the checkpoint
2757		 * can be used to create the boot time backup of the
2758		 * repository.  Checking that the non-persistent database
2759		 * needed initialization, keeps us from making additional
2760		 * checkpoints if configd gets restarted.
2761		 */
2762		if (r == BACKEND_CREATE_NEED_INIT && writable_persist == 0 &&
2763		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2764		    REP_PROTOCOL_SUCCESS) {
2765			r = backend_checkpoint_repository(be);
2766			if (r != REP_PROTOCOL_SUCCESS) {
2767				configd_critical("unable to create checkpoint "
2768				    "of \"%s\"\n", be->be_path);
2769			}
2770			backend_unlock(be);
2771		}
2772
2773		/*
2774		 * If the non-persistent database did not need
2775		 * initialization, svc.configd has been restarted.  See if
2776		 * the boot time checkpoint exists.  If it does, use it to
2777		 * make a backup if root is writable.
2778		 */
2779		if (r != BACKEND_CREATE_NEED_INIT &&
2780		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2781		    REP_PROTOCOL_SUCCESS) {
2782			struct stat sb;
2783
2784			if ((stat(REPOSITORY_CHECKPOINT, &sb) == 0) &&
2785			    (sb.st_size > 0) && (sb.st_mode & S_IFREG)) {
2786				be->be_checkpoint = REPOSITORY_CHECKPOINT;
2787				flight_recorder_event(
2788				    BE_FLIGHT_EV_CHECKPOINT_EXISTS,
2789				    BE_FLIGHT_ST_INFO);
2790			}
2791
2792			/*
2793			 * If we have a checkpoint and root is writable,
2794			 * make the backup now.
2795			 */
2796			if (be->be_checkpoint && writable_persist) {
2797				if (backend_create_backup_locked(be,
2798				    REPOSITORY_BOOT_BACKUP) !=
2799				    REP_PROTOCOL_SUCCESS) {
2800					configd_critical(
2801					    "unable to create \"%s\" backup of "
2802					    "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
2803					    be->be_path);
2804				}
2805			}
2806			backend_unlock(be);
2807		}
2808	}
2809
2810	/*
2811	 * If the persistent backend is writable at this point, upgrade it.
2812	 * This can occur in a few cases, most notably on UFS roots if
2813	 * we are operating on the backend from another root, as is the case
2814	 * during alternate-root BFU.
2815	 *
2816	 * Otherwise, upgrade will occur via backend_check_readonly() when
2817	 * the repository is re-opened read-write.
2818	 */
2819	if (writable_persist) {
2820		r = backend_lock(BACKEND_TYPE_NORMAL, 1, &be);
2821		assert(r == REP_PROTOCOL_SUCCESS);
2822		backend_check_upgrade(be, B_TRUE);
2823		backend_unlock(be);
2824	}
2825
2826	return (CONFIGD_EXIT_OKAY);
2827}
2828
2829/*
2830 * quiesce all database activity prior to exiting
2831 */
2832void
2833backend_fini(void)
2834{
2835	sqlite_backend_t *be_normal, *be_np;
2836
2837	(void) backend_lock(BACKEND_TYPE_NORMAL, 1, &be_normal);
2838	(void) backend_lock(BACKEND_TYPE_NONPERSIST, 1, &be_np);
2839}
2840
2841#define	QUERY_BASE	128
2842backend_query_t *
2843backend_query_alloc(void)
2844{
2845	backend_query_t *q;
2846	q = calloc(1, sizeof (backend_query_t));
2847	if (q != NULL) {
2848		q->bq_size = QUERY_BASE;
2849		q->bq_buf = calloc(1, q->bq_size);
2850		if (q->bq_buf == NULL) {
2851			q->bq_size = 0;
2852		}
2853
2854	}
2855	return (q);
2856}
2857
2858void
2859backend_query_append(backend_query_t *q, const char *value)
2860{
2861	char *alloc;
2862	int count;
2863	size_t size, old_len;
2864
2865	if (q == NULL) {
2866		/* We'll discover the error when we try to run the query. */
2867		return;
2868	}
2869
2870	while (q->bq_buf != NULL) {
2871		old_len = strlen(q->bq_buf);
2872		size = q->bq_size;
2873		count = strlcat(q->bq_buf, value, size);
2874
2875		if (count < size)
2876			break;				/* success */
2877
2878		q->bq_buf[old_len] = 0;
2879		size = round_up_to_p2(count + 1);
2880
2881		assert(size > q->bq_size);
2882		alloc = realloc(q->bq_buf, size);
2883		if (alloc == NULL) {
2884			free(q->bq_buf);
2885			q->bq_buf = NULL;
2886			break;				/* can't grow */
2887		}
2888
2889		q->bq_buf = alloc;
2890		q->bq_size = size;
2891	}
2892}
2893
2894void
2895backend_query_add(backend_query_t *q, const char *format, ...)
2896{
2897	va_list args;
2898	char *new;
2899
2900	if (q == NULL || q->bq_buf == NULL)
2901		return;
2902
2903	va_start(args, format);
2904	new = sqlite_vmprintf(format, args);
2905	va_end(args);
2906
2907	if (new == NULL) {
2908		free(q->bq_buf);
2909		q->bq_buf = NULL;
2910		return;
2911	}
2912
2913	backend_query_append(q, new);
2914
2915	free(new);
2916}
2917
2918void
2919backend_query_free(backend_query_t *q)
2920{
2921	if (q != NULL) {
2922		if (q->bq_buf != NULL) {
2923			free(q->bq_buf);
2924		}
2925		free(q);
2926	}
2927}
2928