1/******************************************************************************
2 * xenstore.c
3 *
4 * Low-level kernel interface to the XenStore.
5 *
6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
7 * Copyright (C) 2009,2010 Spectra Logic Corporation
8 *
9 * This file may be distributed separately from the Linux kernel, or
10 * incorporated into other software packages, subject to the following license:
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30
31#include <sys/param.h>
32#include <sys/bus.h>
33#include <sys/kernel.h>
34#include <sys/lock.h>
35#include <sys/module.h>
36#include <sys/mutex.h>
37#include <sys/sx.h>
38#include <sys/syslog.h>
39#include <sys/malloc.h>
40#include <sys/systm.h>
41#include <sys/proc.h>
42#include <sys/kthread.h>
43#include <sys/sbuf.h>
44#include <sys/sysctl.h>
45#include <sys/uio.h>
46#include <sys/unistd.h>
47#include <sys/queue.h>
48#include <sys/taskqueue.h>
49
50#include <machine/stdarg.h>
51
52#include <xen/xen-os.h>
53#include <xen/hypervisor.h>
54#include <xen/xen_intr.h>
55
56#include <contrib/xen/hvm/params.h>
57#include <xen/hvm.h>
58
59#include <xen/xenstore/xenstorevar.h>
60#include <xen/xenstore/xenstore_internal.h>
61
62#include <vm/vm.h>
63#include <vm/pmap.h>
64
65/**
66 * \file xenstore.c
67 * \brief XenStore interface
68 *
69 * The XenStore interface is a simple storage system that is a means of
70 * communicating state and configuration data between the Xen Domain 0
71 * and the various guest domains.  All configuration data other than
72 * a small amount of essential information required during the early
73 * boot process of launching a Xen aware guest, is managed using the
74 * XenStore.
75 *
76 * The XenStore is ASCII string based, and has a structure and semantics
77 * similar to a filesystem.  There are files and directories, the directories
78 * able to contain files or other directories.  The depth of the hierarchy
79 * is only limited by the XenStore's maximum path length.
80 *
81 * The communication channel between the XenStore service and other
82 * domains is via two, guest specific, ring buffers in a shared memory
83 * area.  One ring buffer is used for communicating in each direction.
84 * The grant table references for this shared memory are given to the
85 * guest either via the xen_start_info structure for a fully para-
86 * virtualized guest, or via HVM hypercalls for a hardware virtualized
87 * guest.
88 *
89 * The XenStore communication relies on an event channel and thus
90 * interrupts.  For this reason, the attachment of the XenStore
91 * relies on an interrupt driven configuration hook to hold off
92 * boot processing until communication with the XenStore service
93 * can be established.
94 *
95 * Several Xen services depend on the XenStore, most notably the
96 * XenBus used to discover and manage Xen devices.  These services
97 * are implemented as NewBus child attachments to a bus exported
98 * by this XenStore driver.
99 */
100
101static struct xs_watch *find_watch(const char *token);
102
103MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
104
105/**
106 * Pointer to shared memory communication structures allowing us
107 * to communicate with the XenStore service.
108 *
109 * When operating in full PV mode, this pointer is set early in kernel
110 * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
111 * to get the guest frame number for the shared page and then map it
112 * into kva.  See xs_init() for details.
113 */
114static struct xenstore_domain_interface *xen_store;
115
116/*-------------------------- Private Data Structures ------------------------*/
117
118/**
119 * Structure capturing messages received from the XenStore service.
120 */
121struct xs_stored_msg {
122	TAILQ_ENTRY(xs_stored_msg) list;
123
124	struct xsd_sockmsg hdr;
125
126	union {
127		/* Queued replies. */
128		struct {
129			char *body;
130		} reply;
131
132		/* Queued watch events. */
133		struct {
134			struct xs_watch *handle;
135			const char **vec;
136			u_int vec_size;
137		} watch;
138	} u;
139};
140TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
141
142/**
143 * Container for all XenStore related state.
144 */
145struct xs_softc {
146	/** Newbus device for the XenStore. */
147	device_t xs_dev;
148
149	/**
150	 * Lock serializing access to ring producer/consumer
151	 * indexes.  Use of this lock guarantees that wakeups
152	 * of blocking readers/writers are not missed due to
153	 * races with the XenStore service.
154	 */
155	struct mtx ring_lock;
156
157	/*
158	 * Mutex used to insure exclusive access to the outgoing
159	 * communication ring.  We use a lock type that can be
160	 * held while sleeping so that xs_write() can block waiting
161	 * for space in the ring to free up, without allowing another
162	 * writer to come in and corrupt a partial message write.
163	 */
164	struct sx request_mutex;
165
166	/**
167	 * A list of replies to our requests.
168	 *
169	 * The reply list is filled by xs_rcv_thread().  It
170	 * is consumed by the context that issued the request
171	 * to which a reply is made.  The requester blocks in
172	 * xs_read_reply().
173	 *
174	 * /note Only one requesting context can be active at a time.
175	 *       This is guaranteed by the request_mutex and insures
176	 *	 that the requester sees replies matching the order
177	 *	 of its requests.
178	 */
179	struct xs_stored_msg_list reply_list;
180
181	/** Lock protecting the reply list. */
182	struct mtx reply_lock;
183
184	/**
185	 * List of registered watches.
186	 */
187	struct xs_watch_list  registered_watches;
188
189	/** Lock protecting the registered watches list. */
190	struct mtx registered_watches_lock;
191
192	/**
193	 * List of pending watch callback events.
194	 */
195	struct xs_stored_msg_list watch_events;
196
197	/** Lock protecting the watch calback list. */
198	struct mtx watch_events_lock;
199
200	/**
201	 * The processid of the xenwatch thread.
202	 */
203	pid_t xenwatch_pid;
204
205	/**
206	 * Sleepable mutex used to gate the execution of XenStore
207	 * watch event callbacks.
208	 *
209	 * xenwatch_thread holds an exclusive lock on this mutex
210	 * while delivering event callbacks, and xenstore_unregister_watch()
211	 * uses an exclusive lock of this mutex to guarantee that no
212	 * callbacks of the just unregistered watch are pending
213	 * before returning to its caller.
214	 */
215	struct sx xenwatch_mutex;
216
217	/**
218	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
219	 * of the true machine frame number into our "physical address space".
220	 */
221	unsigned long gpfn;
222
223	/**
224	 * The event channel for communicating with the
225	 * XenStore service.
226	 */
227	int evtchn;
228
229	/** Handle for XenStore interrupts. */
230	xen_intr_handle_t xen_intr_handle;
231
232	/**
233	 * Interrupt driven config hook allowing us to defer
234	 * attaching children until interrupts (and thus communication
235	 * with the XenStore service) are available.
236	 */
237	struct intr_config_hook xs_attachcb;
238
239	/**
240	 * Xenstore is a user-space process that usually runs in Dom0,
241	 * so if this domain is booting as Dom0, xenstore wont we accessible,
242	 * and we have to defer the initialization of xenstore related
243	 * devices to later (when xenstore is started).
244	 */
245	bool initialized;
246
247	/**
248	 * Task to run when xenstore is initialized (Dom0 only), will
249	 * take care of attaching xenstore related devices.
250	 */
251	struct task xs_late_init;
252};
253
254/*-------------------------------- Global Data ------------------------------*/
255static struct xs_softc xs;
256
257/*------------------------- Private Utility Functions -----------------------*/
258
259/**
260 * Count and optionally record pointers to a number of NUL terminated
261 * strings in a buffer.
262 *
263 * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
264 * \param dest	   An array to store pointers to each string found in strings.
265 * \param len	   The length of the buffer pointed to by strings.
266 *
267 * \return  A count of the number of strings found.
268 */
269static u_int
270extract_strings(const char *strings, const char **dest, u_int len)
271{
272	u_int num;
273	const char *p;
274
275	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
276		if (dest != NULL)
277			*dest++ = p;
278		num++;
279	}
280
281	return (num);
282}
283
284/**
285 * Convert a contiguous buffer containing a series of NUL terminated
286 * strings into an array of pointers to strings.
287 *
288 * The returned pointer references the array of string pointers which
289 * is followed by the storage for the string data.  It is the client's
290 * responsibility to free this storage.
291 *
292 * The storage addressed by strings is free'd prior to split returning.
293 *
294 * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
295 * \param len	   The length of the buffer pointed to by strings.
296 * \param num	   The number of strings found and returned in the strings
297 *                 array.
298 *
299 * \return  An array of pointers to the strings found in the input buffer.
300 */
301static const char **
302split(char *strings, u_int len, u_int *num)
303{
304	const char **ret;
305
306	/* Protect against unterminated buffers. */
307	if (len > 0)
308		strings[len - 1] = '\0';
309
310	/* Count the strings. */
311	*num = extract_strings(strings, /*dest*/NULL, len);
312
313	/* Transfer to one big alloc for easy freeing by the caller. */
314	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
315	memcpy(&ret[*num], strings, len);
316	free(strings, M_XENSTORE);
317
318	/* Extract pointers to newly allocated array. */
319	strings = (char *)&ret[*num];
320	(void)extract_strings(strings, /*dest*/ret, len);
321
322	return (ret);
323}
324
325/*------------------------- Public Utility Functions -------------------------*/
326/*------- API comments for these methods can be found in xenstorevar.h -------*/
327struct sbuf *
328xs_join(const char *dir, const char *name)
329{
330	struct sbuf *sb;
331
332	sb = sbuf_new_auto();
333	sbuf_cat(sb, dir);
334	if (name[0] != '\0') {
335		sbuf_putc(sb, '/');
336		sbuf_cat(sb, name);
337	}
338	sbuf_finish(sb);
339
340	return (sb);
341}
342
343/*-------------------- Low Level Communication Management --------------------*/
344/**
345 * Interrupt handler for the XenStore event channel.
346 *
347 * XenStore reads and writes block on "xen_store" for buffer
348 * space.  Wakeup any blocking operations when the XenStore
349 * service has modified the queues.
350 */
351static void
352xs_intr(void * arg __unused /*__attribute__((unused))*/)
353{
354
355	/* If xenstore has not been initialized, initialize it now */
356	if (!xs.initialized) {
357		xs.initialized = true;
358		/*
359		 * Since this task is probing and attaching devices we
360		 * have to hold the Giant lock.
361		 */
362		taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init);
363	}
364
365	/*
366	 * Hold ring lock across wakeup so that clients
367	 * cannot miss a wakeup.
368	 */
369	mtx_lock(&xs.ring_lock);
370	wakeup(xen_store);
371	mtx_unlock(&xs.ring_lock);
372}
373
374/**
375 * Verify that the indexes for a ring are valid.
376 *
377 * The difference between the producer and consumer cannot
378 * exceed the size of the ring.
379 *
380 * \param cons  The consumer index for the ring to test.
381 * \param prod  The producer index for the ring to test.
382 *
383 * \retval 1  If indexes are in range.
384 * \retval 0  If the indexes are out of range.
385 */
386static int
387xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
388{
389
390	return ((prod - cons) <= XENSTORE_RING_SIZE);
391}
392
393/**
394 * Return a pointer to, and the length of, the contiguous
395 * free region available for output in a ring buffer.
396 *
397 * \param cons  The consumer index for the ring.
398 * \param prod  The producer index for the ring.
399 * \param buf   The base address of the ring's storage.
400 * \param len   The amount of contiguous storage available.
401 *
402 * \return  A pointer to the start location of the free region.
403 */
404static void *
405xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
406    char *buf, uint32_t *len)
407{
408
409	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
410	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
411		*len = XENSTORE_RING_SIZE - (prod - cons);
412	return (buf + MASK_XENSTORE_IDX(prod));
413}
414
415/**
416 * Return a pointer to, and the length of, the contiguous
417 * data available to read from a ring buffer.
418 *
419 * \param cons  The consumer index for the ring.
420 * \param prod  The producer index for the ring.
421 * \param buf   The base address of the ring's storage.
422 * \param len   The amount of contiguous data available to read.
423 *
424 * \return  A pointer to the start location of the available data.
425 */
426static const void *
427xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
428    const char *buf, uint32_t *len)
429{
430
431	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
432	if ((prod - cons) < *len)
433		*len = prod - cons;
434	return (buf + MASK_XENSTORE_IDX(cons));
435}
436
437/**
438 * Transmit data to the XenStore service.
439 *
440 * \param tdata  A pointer to the contiguous data to send.
441 * \param len    The amount of data to send.
442 *
443 * \return  On success 0, otherwise an errno value indicating the
444 *          cause of failure.
445 *
446 * \invariant  Called from thread context.
447 * \invariant  The buffer pointed to by tdata is at least len bytes
448 *             in length.
449 * \invariant  xs.request_mutex exclusively locked.
450 */
451static int
452xs_write_store(const void *tdata, unsigned len)
453{
454	XENSTORE_RING_IDX cons, prod;
455	const char *data = (const char *)tdata;
456	int error;
457
458	sx_assert(&xs.request_mutex, SX_XLOCKED);
459	while (len != 0) {
460		void *dst;
461		u_int avail;
462
463		/* Hold lock so we can't miss wakeups should we block. */
464		mtx_lock(&xs.ring_lock);
465		cons = xen_store->req_cons;
466		prod = xen_store->req_prod;
467		if ((prod - cons) == XENSTORE_RING_SIZE) {
468			/*
469			 * Output ring is full. Wait for a ring event.
470			 *
471			 * Note that the events from both queues
472			 * are combined, so being woken does not
473			 * guarantee that data exist in the read
474			 * ring.
475			 *
476			 * To simplify error recovery and the retry,
477			 * we specify PDROP so our lock is *not* held
478			 * when msleep returns.
479			 */
480			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
481			     "xbwrite", /*timeout*/0);
482			if (error && error != EWOULDBLOCK)
483				return (error);
484
485			/* Try again. */
486			continue;
487		}
488		mtx_unlock(&xs.ring_lock);
489
490		/* Verify queue sanity. */
491		if (!xs_check_indexes(cons, prod)) {
492			xen_store->req_cons = xen_store->req_prod = 0;
493			return (EIO);
494		}
495
496		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
497		if (avail > len)
498			avail = len;
499
500		memcpy(dst, data, avail);
501		data += avail;
502		len -= avail;
503
504		/*
505		 * The store to the producer index, which indicates
506		 * to the other side that new data has arrived, must
507		 * be visible only after our copy of the data into the
508		 * ring has completed.
509		 */
510		wmb();
511		xen_store->req_prod += avail;
512
513		/*
514		 * xen_intr_signal() implies mb(). The other side will see
515		 * the change to req_prod at the time of the interrupt.
516		 */
517		xen_intr_signal(xs.xen_intr_handle);
518	}
519
520	return (0);
521}
522
523/**
524 * Receive data from the XenStore service.
525 *
526 * \param tdata  A pointer to the contiguous buffer to receive the data.
527 * \param len    The amount of data to receive.
528 *
529 * \return  On success 0, otherwise an errno value indicating the
530 *          cause of failure.
531 *
532 * \invariant  Called from thread context.
533 * \invariant  The buffer pointed to by tdata is at least len bytes
534 *             in length.
535 *
536 * \note xs_read does not perform any internal locking to guarantee
537 *       serial access to the incoming ring buffer.  However, there
538 *	 is only one context processing reads: xs_rcv_thread().
539 */
540static int
541xs_read_store(void *tdata, unsigned len)
542{
543	XENSTORE_RING_IDX cons, prod;
544	char *data = (char *)tdata;
545	int error;
546
547	while (len != 0) {
548		u_int avail;
549		const char *src;
550
551		/* Hold lock so we can't miss wakeups should we block. */
552		mtx_lock(&xs.ring_lock);
553		cons = xen_store->rsp_cons;
554		prod = xen_store->rsp_prod;
555		if (cons == prod) {
556			/*
557			 * Nothing to read. Wait for a ring event.
558			 *
559			 * Note that the events from both queues
560			 * are combined, so being woken does not
561			 * guarantee that data exist in the read
562			 * ring.
563			 *
564			 * To simplify error recovery and the retry,
565			 * we specify PDROP so our lock is *not* held
566			 * when msleep returns.
567			 */
568			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
569			    "xbread", /*timeout*/0);
570			if (error && error != EWOULDBLOCK)
571				return (error);
572			continue;
573		}
574		mtx_unlock(&xs.ring_lock);
575
576		/* Verify queue sanity. */
577		if (!xs_check_indexes(cons, prod)) {
578			xen_store->rsp_cons = xen_store->rsp_prod = 0;
579			return (EIO);
580		}
581
582		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
583		if (avail > len)
584			avail = len;
585
586		/*
587		 * Insure the data we read is related to the indexes
588		 * we read above.
589		 */
590		rmb();
591
592		memcpy(data, src, avail);
593		data += avail;
594		len -= avail;
595
596		/*
597		 * Insure that the producer of this ring does not see
598		 * the ring space as free until after we have copied it
599		 * out.
600		 */
601		mb();
602		xen_store->rsp_cons += avail;
603
604		/*
605		 * xen_intr_signal() implies mb(). The producer will see
606		 * the updated consumer index when the event is delivered.
607		 */
608		xen_intr_signal(xs.xen_intr_handle);
609	}
610
611	return (0);
612}
613
614/*----------------------- Received Message Processing ------------------------*/
615/**
616 * Block reading the next message from the XenStore service and
617 * process the result.
618 *
619 * \param type  The returned type of the XenStore message received.
620 *
621 * \return  0 on success.  Otherwise an errno value indicating the
622 *          type of failure encountered.
623 */
624static int
625xs_process_msg(enum xsd_sockmsg_type *type)
626{
627	struct xs_stored_msg *msg;
628	char *body;
629	int error;
630
631	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
632	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
633	if (error) {
634		free(msg, M_XENSTORE);
635		return (error);
636	}
637
638	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
639	error = xs_read_store(body, msg->hdr.len);
640	if (error) {
641		free(body, M_XENSTORE);
642		free(msg, M_XENSTORE);
643		return (error);
644	}
645	body[msg->hdr.len] = '\0';
646
647	*type = msg->hdr.type;
648	if (msg->hdr.type == XS_WATCH_EVENT) {
649		msg->u.watch.vec = split(body, msg->hdr.len,
650		    &msg->u.watch.vec_size);
651
652		mtx_lock(&xs.registered_watches_lock);
653		msg->u.watch.handle = find_watch(
654		    msg->u.watch.vec[XS_WATCH_TOKEN]);
655		mtx_lock(&xs.watch_events_lock);
656		if (msg->u.watch.handle != NULL &&
657		    (!msg->u.watch.handle->max_pending ||
658		    msg->u.watch.handle->pending <
659		    msg->u.watch.handle->max_pending)) {
660			msg->u.watch.handle->pending++;
661			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
662			wakeup(&xs.watch_events);
663			mtx_unlock(&xs.watch_events_lock);
664		} else {
665			mtx_unlock(&xs.watch_events_lock);
666			free(msg->u.watch.vec, M_XENSTORE);
667			free(msg, M_XENSTORE);
668		}
669		mtx_unlock(&xs.registered_watches_lock);
670	} else {
671		msg->u.reply.body = body;
672		mtx_lock(&xs.reply_lock);
673		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
674		wakeup(&xs.reply_list);
675		mtx_unlock(&xs.reply_lock);
676	}
677
678	return (0);
679}
680
681/**
682 * Thread body of the XenStore receive thread.
683 *
684 * This thread blocks waiting for data from the XenStore service
685 * and processes and received messages.
686 */
687static void
688xs_rcv_thread(void *arg __unused)
689{
690	int error;
691	enum xsd_sockmsg_type type;
692
693	for (;;) {
694		error = xs_process_msg(&type);
695		if (error)
696			printf("XENSTORE error %d while reading message\n",
697			    error);
698	}
699}
700
701/*---------------- XenStore Message Request/Reply Processing -----------------*/
702#define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
703
704/**
705 * Convert a XenStore error string into an errno number.
706 *
707 * \param errorstring  The error string to convert.
708 *
709 * \return  The errno best matching the input string.
710 *
711 * \note Unknown error strings are converted to EINVAL.
712 */
713static int
714xs_get_error(const char *errorstring)
715{
716	u_int i;
717
718	for (i = 0; i < xsd_error_count; i++) {
719		if (!strcmp(errorstring, xsd_errors[i].errstring))
720			return (xsd_errors[i].errnum);
721	}
722	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
723	    errorstring);
724	return (EINVAL);
725}
726
727/**
728 * Block waiting for a reply to a message request.
729 *
730 * \param type	  The returned type of the reply.
731 * \param len	  The returned body length of the reply.
732 * \param result  The returned body of the reply.
733 *
734 * \return  0 on success.  Otherwise an errno indicating the
735 *          cause of failure.
736 */
737static int
738xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
739{
740	struct xs_stored_msg *msg;
741	char *body;
742	int error;
743
744	mtx_lock(&xs.reply_lock);
745	while (TAILQ_EMPTY(&xs.reply_list)) {
746		error = mtx_sleep(&xs.reply_list, &xs.reply_lock, 0, "xswait",
747		    hz/10);
748		if (error && error != EWOULDBLOCK) {
749			mtx_unlock(&xs.reply_lock);
750			return (error);
751		}
752	}
753	msg = TAILQ_FIRST(&xs.reply_list);
754	TAILQ_REMOVE(&xs.reply_list, msg, list);
755	mtx_unlock(&xs.reply_lock);
756
757	*type = msg->hdr.type;
758	if (len)
759		*len = msg->hdr.len;
760	body = msg->u.reply.body;
761
762	free(msg, M_XENSTORE);
763	*result = body;
764	return (0);
765}
766
767/**
768 * Pass-thru interface for XenStore access by userland processes
769 * via the XenStore device.
770 *
771 * Reply type and length data are returned by overwriting these
772 * fields in the passed in request message.
773 *
774 * \param msg	  A properly formatted message to transmit to
775 *		  the XenStore service.
776 * \param result  The returned body of the reply.
777 *
778 * \return  0 on success.  Otherwise an errno indicating the cause
779 *          of failure.
780 *
781 * \note The returned result is provided in malloced storage and thus
782 *       must be free'd by the caller with 'free(result, M_XENSTORE);
783 */
784int
785xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
786{
787	int error;
788
789	sx_xlock(&xs.request_mutex);
790	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
791		error = xs_read_reply(&msg->type, &msg->len, result);
792	sx_xunlock(&xs.request_mutex);
793
794	return (error);
795}
796
797/**
798 * Send a message with an optionally muti-part body to the XenStore service.
799 *
800 * \param t              The transaction to use for this request.
801 * \param request_type   The type of message to send.
802 * \param iovec          Pointers to the body sections of the request.
803 * \param num_vecs       The number of body sections in the request.
804 * \param len            The returned length of the reply.
805 * \param result         The returned body of the reply.
806 *
807 * \return  0 on success.  Otherwise an errno indicating
808 *          the cause of failure.
809 *
810 * \note The returned result is provided in malloced storage and thus
811 *       must be free'd by the caller with 'free(*result, M_XENSTORE);
812 */
813static int
814xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
815    const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
816{
817	struct xsd_sockmsg msg;
818	void *ret = NULL;
819	u_int i;
820	int error;
821
822	msg.tx_id = t.id;
823	msg.req_id = 0;
824	msg.type = request_type;
825	msg.len = 0;
826	for (i = 0; i < num_vecs; i++)
827		msg.len += iovec[i].iov_len;
828
829	sx_xlock(&xs.request_mutex);
830	error = xs_write_store(&msg, sizeof(msg));
831	if (error) {
832		printf("xs_talkv failed %d\n", error);
833		goto error_lock_held;
834	}
835
836	for (i = 0; i < num_vecs; i++) {
837		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
838		if (error) {
839			printf("xs_talkv failed %d\n", error);
840			goto error_lock_held;
841		}
842	}
843
844	error = xs_read_reply(&msg.type, len, &ret);
845
846error_lock_held:
847	sx_xunlock(&xs.request_mutex);
848	if (error)
849		return (error);
850
851	if (msg.type == XS_ERROR) {
852		error = xs_get_error(ret);
853		free(ret, M_XENSTORE);
854		return (error);
855	}
856
857	/* Reply is either error or an echo of our request message type. */
858	KASSERT(msg.type == request_type, ("bad xenstore message type"));
859
860	if (result)
861		*result = ret;
862	else
863		free(ret, M_XENSTORE);
864
865	return (0);
866}
867
868/**
869 * Wrapper for xs_talkv allowing easy transmission of a message with
870 * a single, contiguous, message body.
871 *
872 * \param t              The transaction to use for this request.
873 * \param request_type   The type of message to send.
874 * \param body           The body of the request.
875 * \param len            The returned length of the reply.
876 * \param result         The returned body of the reply.
877 *
878 * \return  0 on success.  Otherwise an errno indicating
879 *          the cause of failure.
880 *
881 * \note The returned result is provided in malloced storage and thus
882 *       must be free'd by the caller with 'free(*result, M_XENSTORE);
883 */
884static int
885xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
886    const char *body, u_int *len, void **result)
887{
888	struct iovec iovec;
889
890	iovec.iov_base = (void *)(uintptr_t)body;
891	iovec.iov_len = strlen(body) + 1;
892
893	return (xs_talkv(t, request_type, &iovec, 1, len, result));
894}
895
896/*------------------------- XenStore Watch Support ---------------------------*/
897/**
898 * Transmit a watch request to the XenStore service.
899 *
900 * \param path    The path in the XenStore to watch.
901 * \param tocken  A unique identifier for this watch.
902 *
903 * \return  0 on success.  Otherwise an errno indicating the
904 *          cause of failure.
905 */
906static int
907xs_watch(const char *path, const char *token)
908{
909	struct iovec iov[2];
910
911	iov[0].iov_base = (void *)(uintptr_t) path;
912	iov[0].iov_len = strlen(path) + 1;
913	iov[1].iov_base = (void *)(uintptr_t) token;
914	iov[1].iov_len = strlen(token) + 1;
915
916	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
917}
918
919/**
920 * Transmit an uwatch request to the XenStore service.
921 *
922 * \param path    The path in the XenStore to watch.
923 * \param tocken  A unique identifier for this watch.
924 *
925 * \return  0 on success.  Otherwise an errno indicating the
926 *          cause of failure.
927 */
928static int
929xs_unwatch(const char *path, const char *token)
930{
931	struct iovec iov[2];
932
933	iov[0].iov_base = (void *)(uintptr_t) path;
934	iov[0].iov_len = strlen(path) + 1;
935	iov[1].iov_base = (void *)(uintptr_t) token;
936	iov[1].iov_len = strlen(token) + 1;
937
938	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
939}
940
941/**
942 * Convert from watch token (unique identifier) to the associated
943 * internal tracking structure for this watch.
944 *
945 * \param tocken  The unique identifier for the watch to find.
946 *
947 * \return  A pointer to the found watch structure or NULL.
948 */
949static struct xs_watch *
950find_watch(const char *token)
951{
952	struct xs_watch *i, *cmp;
953
954	cmp = (void *)strtoul(token, NULL, 16);
955
956	LIST_FOREACH(i, &xs.registered_watches, list)
957		if (i == cmp)
958			return (i);
959
960	return (NULL);
961}
962
963/**
964 * Thread body of the XenStore watch event dispatch thread.
965 */
966static void
967xenwatch_thread(void *unused)
968{
969	struct xs_stored_msg *msg;
970
971	for (;;) {
972		mtx_lock(&xs.watch_events_lock);
973		while (TAILQ_EMPTY(&xs.watch_events))
974			mtx_sleep(&xs.watch_events,
975			    &xs.watch_events_lock,
976			    PWAIT | PCATCH, "waitev", hz/10);
977
978		mtx_unlock(&xs.watch_events_lock);
979		sx_xlock(&xs.xenwatch_mutex);
980
981		mtx_lock(&xs.watch_events_lock);
982		msg = TAILQ_FIRST(&xs.watch_events);
983		if (msg) {
984			TAILQ_REMOVE(&xs.watch_events, msg, list);
985			msg->u.watch.handle->pending--;
986		}
987		mtx_unlock(&xs.watch_events_lock);
988
989		if (msg != NULL) {
990			/*
991			 * XXX There are messages coming in with a NULL
992			 * XXX callback.  This deserves further investigation;
993			 * XXX the workaround here simply prevents the kernel
994			 * XXX from panic'ing on startup.
995			 */
996			if (msg->u.watch.handle->callback != NULL)
997				msg->u.watch.handle->callback(
998					msg->u.watch.handle,
999					(const char **)msg->u.watch.vec,
1000					msg->u.watch.vec_size);
1001			free(msg->u.watch.vec, M_XENSTORE);
1002			free(msg, M_XENSTORE);
1003		}
1004
1005		sx_xunlock(&xs.xenwatch_mutex);
1006	}
1007}
1008
1009/*----------- XenStore Configuration, Initialization, and Control ------------*/
1010/**
1011 * Setup communication channels with the XenStore service.
1012 *
1013 * \return  On success, 0. Otherwise an errno value indicating the
1014 *          type of failure.
1015 */
1016static int
1017xs_init_comms(void)
1018{
1019	int error;
1020
1021	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1022		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1023		    "(%08x:%08x): fixing up\n",
1024		    xen_store->rsp_cons, xen_store->rsp_prod);
1025		xen_store->rsp_cons = xen_store->rsp_prod;
1026	}
1027
1028	xen_intr_unbind(&xs.xen_intr_handle);
1029
1030	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1031	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1032	    &xs.xen_intr_handle);
1033	if (error) {
1034		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1035		return (error);
1036	}
1037
1038	return (0);
1039}
1040
1041/*------------------ Private Device Attachment Functions  --------------------*/
1042static void
1043xs_identify(driver_t *driver, device_t parent)
1044{
1045
1046	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1047}
1048
1049/**
1050 * Probe for the existence of the XenStore.
1051 *
1052 * \param dev
1053 */
1054static int
1055xs_probe(device_t dev)
1056{
1057	/*
1058	 * We are either operating within a PV kernel or being probed
1059	 * as the child of the successfully attached xenpci device.
1060	 * Thus we are in a Xen environment and there will be a XenStore.
1061	 * Unconditionally return success.
1062	 */
1063	device_set_desc(dev, "XenStore");
1064	return (BUS_PROBE_NOWILDCARD);
1065}
1066
1067static void
1068xs_attach_deferred(void *arg)
1069{
1070
1071	bus_generic_probe(xs.xs_dev);
1072	bus_generic_attach(xs.xs_dev);
1073
1074	config_intrhook_disestablish(&xs.xs_attachcb);
1075}
1076
1077static void
1078xs_attach_late(void *arg, int pending)
1079{
1080
1081	KASSERT((pending == 1), ("xs late attach queued several times"));
1082	bus_generic_probe(xs.xs_dev);
1083	bus_generic_attach(xs.xs_dev);
1084}
1085
1086/**
1087 * Attach to the XenStore.
1088 *
1089 * This routine also prepares for the probe/attach of drivers that rely
1090 * on the XenStore.
1091 */
1092static int
1093xs_attach(device_t dev)
1094{
1095	int error;
1096
1097	/* Allow us to get device_t from softc and vice-versa. */
1098	xs.xs_dev = dev;
1099	device_set_softc(dev, &xs);
1100
1101	/* Initialize the interface to xenstore. */
1102	struct proc *p;
1103
1104	xs.initialized = false;
1105	xs.evtchn = xen_get_xenstore_evtchn();
1106	if (xs.evtchn == 0) {
1107		struct evtchn_alloc_unbound alloc_unbound;
1108
1109		/* Allocate a local event channel for xenstore */
1110		alloc_unbound.dom = DOMID_SELF;
1111		alloc_unbound.remote_dom = DOMID_SELF;
1112		error = HYPERVISOR_event_channel_op(
1113		    EVTCHNOP_alloc_unbound, &alloc_unbound);
1114		if (error != 0)
1115			panic(
1116			   "unable to alloc event channel for Dom0: %d",
1117			    error);
1118
1119		xs.evtchn = alloc_unbound.port;
1120
1121		/* Allocate memory for the xs shared ring */
1122		xen_store = malloc(PAGE_SIZE, M_XENSTORE, M_WAITOK | M_ZERO);
1123		xs.gpfn = atop(pmap_kextract((vm_offset_t)xen_store));
1124	} else {
1125		xs.gpfn = xen_get_xenstore_mfn();
1126		xen_store = pmap_mapdev_attr(ptoa(xs.gpfn), PAGE_SIZE,
1127		    VM_MEMATTR_XEN);
1128		xs.initialized = true;
1129	}
1130
1131	TAILQ_INIT(&xs.reply_list);
1132	TAILQ_INIT(&xs.watch_events);
1133
1134	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1135	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1136	sx_init(&xs.xenwatch_mutex, "xenwatch");
1137	sx_init(&xs.request_mutex, "xenstore request");
1138	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1139	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1140
1141	/* Initialize the shared memory rings to talk to xenstored */
1142	error = xs_init_comms();
1143	if (error)
1144		return (error);
1145
1146	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1147	    0, "xenwatch");
1148	if (error)
1149		return (error);
1150	xs.xenwatch_pid = p->p_pid;
1151
1152	error = kproc_create(xs_rcv_thread, NULL, NULL,
1153	    RFHIGHPID, 0, "xenstore_rcv");
1154
1155	xs.xs_attachcb.ich_func = xs_attach_deferred;
1156	xs.xs_attachcb.ich_arg = NULL;
1157	if (xs.initialized) {
1158		config_intrhook_establish(&xs.xs_attachcb);
1159	} else {
1160		TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL);
1161	}
1162
1163	return (error);
1164}
1165
1166/**
1167 * Prepare for suspension of this VM by halting XenStore access after
1168 * all transactions and individual requests have completed.
1169 */
1170static int
1171xs_suspend(device_t dev)
1172{
1173	int error;
1174
1175	/* Suspend child Xen devices. */
1176	error = bus_generic_suspend(dev);
1177	if (error != 0)
1178		return (error);
1179
1180	sx_xlock(&xs.request_mutex);
1181
1182	return (0);
1183}
1184
1185/**
1186 * Resume XenStore operations after this VM is resumed.
1187 */
1188static int
1189xs_resume(device_t dev __unused)
1190{
1191	struct xs_watch *watch;
1192	char token[sizeof(watch) * 2 + 1];
1193
1194	xs_init_comms();
1195
1196	sx_xunlock(&xs.request_mutex);
1197
1198	/*
1199	 * NB: since xenstore childs have not been resumed yet, there's
1200	 * no need to hold any watch mutex. Having clients try to add or
1201	 * remove watches at this point (before xenstore is resumed) is
1202	 * clearly a violantion of the resume order.
1203	 */
1204	LIST_FOREACH(watch, &xs.registered_watches, list) {
1205		sprintf(token, "%lX", (long)watch);
1206		xs_watch(watch->node, token);
1207	}
1208
1209	/* Resume child Xen devices. */
1210	bus_generic_resume(dev);
1211
1212	return (0);
1213}
1214
1215/*-------------------- Private Device Attachment Data  -----------------------*/
1216static device_method_t xenstore_methods[] = {
1217	/* Device interface */
1218	DEVMETHOD(device_identify,	xs_identify),
1219	DEVMETHOD(device_probe,         xs_probe),
1220	DEVMETHOD(device_attach,        xs_attach),
1221	DEVMETHOD(device_detach,        bus_generic_detach),
1222	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1223	DEVMETHOD(device_suspend,       xs_suspend),
1224	DEVMETHOD(device_resume,        xs_resume),
1225
1226	/* Bus interface */
1227	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1228	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1229	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1230	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1231	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1232
1233	DEVMETHOD_END
1234};
1235
1236DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1237
1238DRIVER_MODULE(xenstore, xenpv, xenstore_driver, 0, 0);
1239
1240/*------------------------------- Sysctl Data --------------------------------*/
1241/* XXX Shouldn't the node be somewhere else? */
1242SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1243    "Xen");
1244SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1245SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1246
1247/*-------------------------------- Public API --------------------------------*/
1248/*------- API comments for these methods can be found in xenstorevar.h -------*/
1249bool
1250xs_initialized(void)
1251{
1252
1253	return (xs.initialized);
1254}
1255
1256evtchn_port_t
1257xs_evtchn(void)
1258{
1259
1260    return (xs.evtchn);
1261}
1262
1263vm_paddr_t
1264xs_address(void)
1265{
1266
1267    return (ptoa(xs.gpfn));
1268}
1269
1270int
1271xs_directory(struct xs_transaction t, const char *dir, const char *node,
1272    u_int *num, const char ***result)
1273{
1274	struct sbuf *path;
1275	char *strings;
1276	u_int len = 0;
1277	int error;
1278
1279	path = xs_join(dir, node);
1280	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1281	    (void **)&strings);
1282	sbuf_delete(path);
1283	if (error)
1284		return (error);
1285
1286	*result = split(strings, len, num);
1287
1288	return (0);
1289}
1290
1291int
1292xs_exists(struct xs_transaction t, const char *dir, const char *node)
1293{
1294	const char **d;
1295	int error, dir_n;
1296
1297	error = xs_directory(t, dir, node, &dir_n, &d);
1298	if (error)
1299		return (0);
1300	free(d, M_XENSTORE);
1301	return (1);
1302}
1303
1304int
1305xs_read(struct xs_transaction t, const char *dir, const char *node,
1306    u_int *len, void **result)
1307{
1308	struct sbuf *path;
1309	void *ret;
1310	int error;
1311
1312	path = xs_join(dir, node);
1313	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1314	sbuf_delete(path);
1315	if (error)
1316		return (error);
1317	*result = ret;
1318	return (0);
1319}
1320
1321int
1322xs_write(struct xs_transaction t, const char *dir, const char *node,
1323    const char *string)
1324{
1325	struct sbuf *path;
1326	struct iovec iovec[2];
1327	int error;
1328
1329	path = xs_join(dir, node);
1330
1331	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1332	iovec[0].iov_len = sbuf_len(path) + 1;
1333	iovec[1].iov_base = (void *)(uintptr_t) string;
1334	iovec[1].iov_len = strlen(string);
1335
1336	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1337	sbuf_delete(path);
1338
1339	return (error);
1340}
1341
1342int
1343xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1344{
1345	struct sbuf *path;
1346	int ret;
1347
1348	path = xs_join(dir, node);
1349	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1350	sbuf_delete(path);
1351
1352	return (ret);
1353}
1354
1355int
1356xs_rm(struct xs_transaction t, const char *dir, const char *node)
1357{
1358	struct sbuf *path;
1359	int ret;
1360
1361	path = xs_join(dir, node);
1362	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1363	sbuf_delete(path);
1364
1365	return (ret);
1366}
1367
1368int
1369xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1370{
1371	struct xs_transaction local_xbt;
1372	struct sbuf *root_path_sbuf;
1373	struct sbuf *cur_path_sbuf;
1374	char *root_path;
1375	char *cur_path;
1376	const char **dir;
1377	int error;
1378
1379retry:
1380	root_path_sbuf = xs_join(base, node);
1381	cur_path_sbuf  = xs_join(base, node);
1382	root_path      = sbuf_data(root_path_sbuf);
1383	cur_path       = sbuf_data(cur_path_sbuf);
1384	dir            = NULL;
1385	local_xbt.id   = 0;
1386
1387	if (xbt.id == 0) {
1388		error = xs_transaction_start(&local_xbt);
1389		if (error != 0)
1390			goto out;
1391		xbt = local_xbt;
1392	}
1393
1394	while (1) {
1395		u_int count;
1396		u_int i;
1397
1398		error = xs_directory(xbt, cur_path, "", &count, &dir);
1399		if (error)
1400			goto out;
1401
1402		for (i = 0; i < count; i++) {
1403			error = xs_rm(xbt, cur_path, dir[i]);
1404			if (error == ENOTEMPTY) {
1405				struct sbuf *push_dir;
1406
1407				/*
1408				 * Descend to clear out this sub directory.
1409				 * We'll return to cur_dir once push_dir
1410				 * is empty.
1411				 */
1412				push_dir = xs_join(cur_path, dir[i]);
1413				sbuf_delete(cur_path_sbuf);
1414				cur_path_sbuf = push_dir;
1415				cur_path = sbuf_data(cur_path_sbuf);
1416				break;
1417			} else if (error != 0) {
1418				goto out;
1419			}
1420		}
1421
1422		free(dir, M_XENSTORE);
1423		dir = NULL;
1424
1425		if (i == count) {
1426			char *last_slash;
1427
1428			/* Directory is empty.  It is now safe to remove. */
1429			error = xs_rm(xbt, cur_path, "");
1430			if (error != 0)
1431				goto out;
1432
1433			if (!strcmp(cur_path, root_path))
1434				break;
1435
1436			/* Return to processing the parent directory. */
1437			last_slash = strrchr(cur_path, '/');
1438			KASSERT(last_slash != NULL,
1439				("xs_rm_tree: mangled path %s", cur_path));
1440			*last_slash = '\0';
1441		}
1442	}
1443
1444out:
1445	sbuf_delete(cur_path_sbuf);
1446	sbuf_delete(root_path_sbuf);
1447	if (dir != NULL)
1448		free(dir, M_XENSTORE);
1449
1450	if (local_xbt.id != 0) {
1451		int terror;
1452
1453		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1454		xbt.id = 0;
1455		if (terror == EAGAIN && error == 0)
1456			goto retry;
1457	}
1458	return (error);
1459}
1460
1461int
1462xs_transaction_start(struct xs_transaction *t)
1463{
1464	char *id_str;
1465	int error;
1466
1467	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1468	    (void **)&id_str);
1469	if (error == 0) {
1470		t->id = strtoul(id_str, NULL, 0);
1471		free(id_str, M_XENSTORE);
1472	}
1473	return (error);
1474}
1475
1476int
1477xs_transaction_end(struct xs_transaction t, int abort)
1478{
1479	char abortstr[2];
1480
1481	if (abort)
1482		strcpy(abortstr, "F");
1483	else
1484		strcpy(abortstr, "T");
1485
1486	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1487}
1488
1489int
1490xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1491     int *scancountp, const char *fmt, ...)
1492{
1493	va_list ap;
1494	int error, ns;
1495	char *val;
1496
1497	error = xs_read(t, dir, node, NULL, (void **) &val);
1498	if (error)
1499		return (error);
1500
1501	va_start(ap, fmt);
1502	ns = vsscanf(val, fmt, ap);
1503	va_end(ap);
1504	free(val, M_XENSTORE);
1505	/* Distinctive errno. */
1506	if (ns == 0)
1507		return (ERANGE);
1508	if (scancountp)
1509		*scancountp = ns;
1510	return (0);
1511}
1512
1513int
1514xs_vprintf(struct xs_transaction t,
1515    const char *dir, const char *node, const char *fmt, va_list ap)
1516{
1517	struct sbuf *sb;
1518	int error;
1519
1520	sb = sbuf_new_auto();
1521	sbuf_vprintf(sb, fmt, ap);
1522	sbuf_finish(sb);
1523	error = xs_write(t, dir, node, sbuf_data(sb));
1524	sbuf_delete(sb);
1525
1526	return (error);
1527}
1528
1529int
1530xs_printf(struct xs_transaction t, const char *dir, const char *node,
1531     const char *fmt, ...)
1532{
1533	va_list ap;
1534	int error;
1535
1536	va_start(ap, fmt);
1537	error = xs_vprintf(t, dir, node, fmt, ap);
1538	va_end(ap);
1539
1540	return (error);
1541}
1542
1543int
1544xs_gather(struct xs_transaction t, const char *dir, ...)
1545{
1546	va_list ap;
1547	const char *name;
1548	int error;
1549
1550	va_start(ap, dir);
1551	error = 0;
1552	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1553		const char *fmt = va_arg(ap, char *);
1554		void *result = va_arg(ap, void *);
1555		char *p;
1556
1557		error = xs_read(t, dir, name, NULL, (void **) &p);
1558		if (error)
1559			break;
1560
1561		if (fmt) {
1562			if (sscanf(p, fmt, result) == 0)
1563				error = EINVAL;
1564			free(p, M_XENSTORE);
1565		} else
1566			*(char **)result = p;
1567	}
1568	va_end(ap);
1569
1570	return (error);
1571}
1572
1573int
1574xs_register_watch(struct xs_watch *watch)
1575{
1576	/* Pointer in ascii is the token. */
1577	char token[sizeof(watch) * 2 + 1];
1578	int error;
1579
1580	watch->pending = 0;
1581	sprintf(token, "%lX", (long)watch);
1582
1583	mtx_lock(&xs.registered_watches_lock);
1584	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1585	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1586	mtx_unlock(&xs.registered_watches_lock);
1587
1588	error = xs_watch(watch->node, token);
1589
1590	/* Ignore errors due to multiple registration. */
1591	if (error == EEXIST)
1592		error = 0;
1593
1594	if (error != 0) {
1595		mtx_lock(&xs.registered_watches_lock);
1596		LIST_REMOVE(watch, list);
1597		mtx_unlock(&xs.registered_watches_lock);
1598	}
1599
1600	return (error);
1601}
1602
1603void
1604xs_unregister_watch(struct xs_watch *watch)
1605{
1606	struct xs_stored_msg *msg, *tmp;
1607	char token[sizeof(watch) * 2 + 1];
1608	int error;
1609
1610	sprintf(token, "%lX", (long)watch);
1611
1612	mtx_lock(&xs.registered_watches_lock);
1613	if (find_watch(token) == NULL) {
1614		mtx_unlock(&xs.registered_watches_lock);
1615		return;
1616	}
1617	LIST_REMOVE(watch, list);
1618	mtx_unlock(&xs.registered_watches_lock);
1619
1620	error = xs_unwatch(watch->node, token);
1621	if (error)
1622		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1623		    watch->node, error);
1624
1625	/* Cancel pending watch events. */
1626	mtx_lock(&xs.watch_events_lock);
1627	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1628		if (msg->u.watch.handle != watch)
1629			continue;
1630		TAILQ_REMOVE(&xs.watch_events, msg, list);
1631		free(msg->u.watch.vec, M_XENSTORE);
1632		free(msg, M_XENSTORE);
1633	}
1634	mtx_unlock(&xs.watch_events_lock);
1635
1636	/* Flush any currently-executing callback, unless we are it. :-) */
1637	if (curproc->p_pid != xs.xenwatch_pid) {
1638		sx_xlock(&xs.xenwatch_mutex);
1639		sx_xunlock(&xs.xenwatch_mutex);
1640	}
1641}
1642
1643void
1644xs_lock(void)
1645{
1646
1647	sx_xlock(&xs.request_mutex);
1648	return;
1649}
1650
1651void
1652xs_unlock(void)
1653{
1654
1655	sx_xunlock(&xs.request_mutex);
1656	return;
1657}
1658