1/******************************************************************************
2 * xenstore.c
3 *
4 * Low-level kernel interface to the XenStore.
5 *
6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
7 * Copyright (C) 2009,2010 Spectra Logic Corporation
8 *
9 * This file may be distributed separately from the Linux kernel, or
10 * incorporated into other software packages, subject to the following license:
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include <sys/param.h>
35#include <sys/bus.h>
36#include <sys/kernel.h>
37#include <sys/lock.h>
38#include <sys/module.h>
39#include <sys/mutex.h>
40#include <sys/sx.h>
41#include <sys/syslog.h>
42#include <sys/malloc.h>
43#include <sys/systm.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <sys/sbuf.h>
47#include <sys/sysctl.h>
48#include <sys/uio.h>
49#include <sys/unistd.h>
50#include <sys/queue.h>
51#include <sys/taskqueue.h>
52
53#include <machine/stdarg.h>
54
55#include <xen/xen-os.h>
56#include <xen/hypervisor.h>
57#include <xen/xen_intr.h>
58
59#include <xen/interface/hvm/params.h>
60#include <xen/hvm.h>
61
62#include <xen/xenstore/xenstorevar.h>
63#include <xen/xenstore/xenstore_internal.h>
64
65#include <vm/vm.h>
66#include <vm/pmap.h>
67
68/**
69 * \file xenstore.c
70 * \brief XenStore interface
71 *
72 * The XenStore interface is a simple storage system that is a means of
73 * communicating state and configuration data between the Xen Domain 0
74 * and the various guest domains.  All configuration data other than
75 * a small amount of essential information required during the early
76 * boot process of launching a Xen aware guest, is managed using the
77 * XenStore.
78 *
79 * The XenStore is ASCII string based, and has a structure and semantics
80 * similar to a filesystem.  There are files and directories, the directories
81 * able to contain files or other directories.  The depth of the hierarchy
82 * is only limited by the XenStore's maximum path length.
83 *
84 * The communication channel between the XenStore service and other
85 * domains is via two, guest specific, ring buffers in a shared memory
86 * area.  One ring buffer is used for communicating in each direction.
87 * The grant table references for this shared memory are given to the
88 * guest either via the xen_start_info structure for a fully para-
89 * virtualized guest, or via HVM hypercalls for a hardware virtualized
90 * guest.
91 *
92 * The XenStore communication relies on an event channel and thus
93 * interrupts.  For this reason, the attachment of the XenStore
94 * relies on an interrupt driven configuration hook to hold off
95 * boot processing until communication with the XenStore service
96 * can be established.
97 *
98 * Several Xen services depend on the XenStore, most notably the
99 * XenBus used to discover and manage Xen devices.  These services
100 * are implemented as NewBus child attachments to a bus exported
101 * by this XenStore driver.
102 */
103
104static struct xs_watch *find_watch(const char *token);
105
106MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
107
108/**
109 * Pointer to shared memory communication structures allowing us
110 * to communicate with the XenStore service.
111 *
112 * When operating in full PV mode, this pointer is set early in kernel
113 * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
114 * to get the guest frame number for the shared page and then map it
115 * into kva.  See xs_init() for details.
116 */
117static struct xenstore_domain_interface *xen_store;
118
119/*-------------------------- Private Data Structures ------------------------*/
120
121/**
122 * Structure capturing messages received from the XenStore service.
123 */
124struct xs_stored_msg {
125	TAILQ_ENTRY(xs_stored_msg) list;
126
127	struct xsd_sockmsg hdr;
128
129	union {
130		/* Queued replies. */
131		struct {
132			char *body;
133		} reply;
134
135		/* Queued watch events. */
136		struct {
137			struct xs_watch *handle;
138			const char **vec;
139			u_int vec_size;
140		} watch;
141	} u;
142};
143TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
144
145/**
146 * Container for all XenStore related state.
147 */
148struct xs_softc {
149	/** Newbus device for the XenStore. */
150	device_t xs_dev;
151
152	/**
153	 * Lock serializing access to ring producer/consumer
154	 * indexes.  Use of this lock guarantees that wakeups
155	 * of blocking readers/writers are not missed due to
156	 * races with the XenStore service.
157	 */
158	struct mtx ring_lock;
159
160	/*
161	 * Mutex used to insure exclusive access to the outgoing
162	 * communication ring.  We use a lock type that can be
163	 * held while sleeping so that xs_write() can block waiting
164	 * for space in the ring to free up, without allowing another
165	 * writer to come in and corrupt a partial message write.
166	 */
167	struct sx request_mutex;
168
169	/**
170	 * A list of replies to our requests.
171	 *
172	 * The reply list is filled by xs_rcv_thread().  It
173	 * is consumed by the context that issued the request
174	 * to which a reply is made.  The requester blocks in
175	 * xs_read_reply().
176	 *
177	 * /note Only one requesting context can be active at a time.
178	 *       This is guaranteed by the request_mutex and insures
179	 *	 that the requester sees replies matching the order
180	 *	 of its requests.
181	 */
182	struct xs_stored_msg_list reply_list;
183
184	/** Lock protecting the reply list. */
185	struct mtx reply_lock;
186
187	/**
188	 * List of registered watches.
189	 */
190	struct xs_watch_list  registered_watches;
191
192	/** Lock protecting the registered watches list. */
193	struct mtx registered_watches_lock;
194
195	/**
196	 * List of pending watch callback events.
197	 */
198	struct xs_stored_msg_list watch_events;
199
200	/** Lock protecting the watch calback list. */
201	struct mtx watch_events_lock;
202
203	/**
204	 * The processid of the xenwatch thread.
205	 */
206	pid_t xenwatch_pid;
207
208	/**
209	 * Sleepable mutex used to gate the execution of XenStore
210	 * watch event callbacks.
211	 *
212	 * xenwatch_thread holds an exclusive lock on this mutex
213	 * while delivering event callbacks, and xenstore_unregister_watch()
214	 * uses an exclusive lock of this mutex to guarantee that no
215	 * callbacks of the just unregistered watch are pending
216	 * before returning to its caller.
217	 */
218	struct sx xenwatch_mutex;
219
220	/**
221	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
222	 * of the true machine frame number into our "physical address space".
223	 */
224	unsigned long gpfn;
225
226	/**
227	 * The event channel for communicating with the
228	 * XenStore service.
229	 */
230	int evtchn;
231
232	/** Handle for XenStore interrupts. */
233	xen_intr_handle_t xen_intr_handle;
234
235	/**
236	 * Interrupt driven config hook allowing us to defer
237	 * attaching children until interrupts (and thus communication
238	 * with the XenStore service) are available.
239	 */
240	struct intr_config_hook xs_attachcb;
241
242	/**
243	 * Xenstore is a user-space process that usually runs in Dom0,
244	 * so if this domain is booting as Dom0, xenstore wont we accessible,
245	 * and we have to defer the initialization of xenstore related
246	 * devices to later (when xenstore is started).
247	 */
248	bool initialized;
249
250	/**
251	 * Task to run when xenstore is initialized (Dom0 only), will
252	 * take care of attaching xenstore related devices.
253	 */
254	struct task xs_late_init;
255};
256
257/*-------------------------------- Global Data ------------------------------*/
258static struct xs_softc xs;
259
260/*------------------------- Private Utility Functions -----------------------*/
261
262/**
263 * Count and optionally record pointers to a number of NUL terminated
264 * strings in a buffer.
265 *
266 * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
267 * \param dest	   An array to store pointers to each string found in strings.
268 * \param len	   The length of the buffer pointed to by strings.
269 *
270 * \return  A count of the number of strings found.
271 */
272static u_int
273extract_strings(const char *strings, const char **dest, u_int len)
274{
275	u_int num;
276	const char *p;
277
278	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
279		if (dest != NULL)
280			*dest++ = p;
281		num++;
282	}
283
284	return (num);
285}
286
287/**
288 * Convert a contiguous buffer containing a series of NUL terminated
289 * strings into an array of pointers to strings.
290 *
291 * The returned pointer references the array of string pointers which
292 * is followed by the storage for the string data.  It is the client's
293 * responsibility to free this storage.
294 *
295 * The storage addressed by strings is free'd prior to split returning.
296 *
297 * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
298 * \param len	   The length of the buffer pointed to by strings.
299 * \param num	   The number of strings found and returned in the strings
300 *                 array.
301 *
302 * \return  An array of pointers to the strings found in the input buffer.
303 */
304static const char **
305split(char *strings, u_int len, u_int *num)
306{
307	const char **ret;
308
309	/* Protect against unterminated buffers. */
310	if (len > 0)
311		strings[len - 1] = '\0';
312
313	/* Count the strings. */
314	*num = extract_strings(strings, /*dest*/NULL, len);
315
316	/* Transfer to one big alloc for easy freeing by the caller. */
317	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
318	memcpy(&ret[*num], strings, len);
319	free(strings, M_XENSTORE);
320
321	/* Extract pointers to newly allocated array. */
322	strings = (char *)&ret[*num];
323	(void)extract_strings(strings, /*dest*/ret, len);
324
325	return (ret);
326}
327
328/*------------------------- Public Utility Functions -------------------------*/
329/*------- API comments for these methods can be found in xenstorevar.h -------*/
330struct sbuf *
331xs_join(const char *dir, const char *name)
332{
333	struct sbuf *sb;
334
335	sb = sbuf_new_auto();
336	sbuf_cat(sb, dir);
337	if (name[0] != '\0') {
338		sbuf_putc(sb, '/');
339		sbuf_cat(sb, name);
340	}
341	sbuf_finish(sb);
342
343	return (sb);
344}
345
346/*-------------------- Low Level Communication Management --------------------*/
347/**
348 * Interrupt handler for the XenStore event channel.
349 *
350 * XenStore reads and writes block on "xen_store" for buffer
351 * space.  Wakeup any blocking operations when the XenStore
352 * service has modified the queues.
353 */
354static void
355xs_intr(void * arg __unused /*__attribute__((unused))*/)
356{
357
358	/* If xenstore has not been initialized, initialize it now */
359	if (!xs.initialized) {
360		xs.initialized = true;
361		/*
362		 * Since this task is probing and attaching devices we
363		 * have to hold the Giant lock.
364		 */
365		taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init);
366	}
367
368	/*
369	 * Hold ring lock across wakeup so that clients
370	 * cannot miss a wakeup.
371	 */
372	mtx_lock(&xs.ring_lock);
373	wakeup(xen_store);
374	mtx_unlock(&xs.ring_lock);
375}
376
377/**
378 * Verify that the indexes for a ring are valid.
379 *
380 * The difference between the producer and consumer cannot
381 * exceed the size of the ring.
382 *
383 * \param cons  The consumer index for the ring to test.
384 * \param prod  The producer index for the ring to test.
385 *
386 * \retval 1  If indexes are in range.
387 * \retval 0  If the indexes are out of range.
388 */
389static int
390xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
391{
392
393	return ((prod - cons) <= XENSTORE_RING_SIZE);
394}
395
396/**
397 * Return a pointer to, and the length of, the contiguous
398 * free region available for output in a ring buffer.
399 *
400 * \param cons  The consumer index for the ring.
401 * \param prod  The producer index for the ring.
402 * \param buf   The base address of the ring's storage.
403 * \param len   The amount of contiguous storage available.
404 *
405 * \return  A pointer to the start location of the free region.
406 */
407static void *
408xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
409    char *buf, uint32_t *len)
410{
411
412	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
413	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
414		*len = XENSTORE_RING_SIZE - (prod - cons);
415	return (buf + MASK_XENSTORE_IDX(prod));
416}
417
418/**
419 * Return a pointer to, and the length of, the contiguous
420 * data available to read from a ring buffer.
421 *
422 * \param cons  The consumer index for the ring.
423 * \param prod  The producer index for the ring.
424 * \param buf   The base address of the ring's storage.
425 * \param len   The amount of contiguous data available to read.
426 *
427 * \return  A pointer to the start location of the available data.
428 */
429static const void *
430xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
431    const char *buf, uint32_t *len)
432{
433
434	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
435	if ((prod - cons) < *len)
436		*len = prod - cons;
437	return (buf + MASK_XENSTORE_IDX(cons));
438}
439
440/**
441 * Transmit data to the XenStore service.
442 *
443 * \param tdata  A pointer to the contiguous data to send.
444 * \param len    The amount of data to send.
445 *
446 * \return  On success 0, otherwise an errno value indicating the
447 *          cause of failure.
448 *
449 * \invariant  Called from thread context.
450 * \invariant  The buffer pointed to by tdata is at least len bytes
451 *             in length.
452 * \invariant  xs.request_mutex exclusively locked.
453 */
454static int
455xs_write_store(const void *tdata, unsigned len)
456{
457	XENSTORE_RING_IDX cons, prod;
458	const char *data = (const char *)tdata;
459	int error;
460
461	sx_assert(&xs.request_mutex, SX_XLOCKED);
462	while (len != 0) {
463		void *dst;
464		u_int avail;
465
466		/* Hold lock so we can't miss wakeups should we block. */
467		mtx_lock(&xs.ring_lock);
468		cons = xen_store->req_cons;
469		prod = xen_store->req_prod;
470		if ((prod - cons) == XENSTORE_RING_SIZE) {
471			/*
472			 * Output ring is full. Wait for a ring event.
473			 *
474			 * Note that the events from both queues
475			 * are combined, so being woken does not
476			 * guarantee that data exist in the read
477			 * ring.
478			 *
479			 * To simplify error recovery and the retry,
480			 * we specify PDROP so our lock is *not* held
481			 * when msleep returns.
482			 */
483			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
484			     "xbwrite", /*timeout*/0);
485			if (error && error != EWOULDBLOCK)
486				return (error);
487
488			/* Try again. */
489			continue;
490		}
491		mtx_unlock(&xs.ring_lock);
492
493		/* Verify queue sanity. */
494		if (!xs_check_indexes(cons, prod)) {
495			xen_store->req_cons = xen_store->req_prod = 0;
496			return (EIO);
497		}
498
499		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
500		if (avail > len)
501			avail = len;
502
503		memcpy(dst, data, avail);
504		data += avail;
505		len -= avail;
506
507		/*
508		 * The store to the producer index, which indicates
509		 * to the other side that new data has arrived, must
510		 * be visible only after our copy of the data into the
511		 * ring has completed.
512		 */
513		wmb();
514		xen_store->req_prod += avail;
515
516		/*
517		 * xen_intr_signal() implies mb(). The other side will see
518		 * the change to req_prod at the time of the interrupt.
519		 */
520		xen_intr_signal(xs.xen_intr_handle);
521	}
522
523	return (0);
524}
525
526/**
527 * Receive data from the XenStore service.
528 *
529 * \param tdata  A pointer to the contiguous buffer to receive the data.
530 * \param len    The amount of data to receive.
531 *
532 * \return  On success 0, otherwise an errno value indicating the
533 *          cause of failure.
534 *
535 * \invariant  Called from thread context.
536 * \invariant  The buffer pointed to by tdata is at least len bytes
537 *             in length.
538 *
539 * \note xs_read does not perform any internal locking to guarantee
540 *       serial access to the incoming ring buffer.  However, there
541 *	 is only one context processing reads: xs_rcv_thread().
542 */
543static int
544xs_read_store(void *tdata, unsigned len)
545{
546	XENSTORE_RING_IDX cons, prod;
547	char *data = (char *)tdata;
548	int error;
549
550	while (len != 0) {
551		u_int avail;
552		const char *src;
553
554		/* Hold lock so we can't miss wakeups should we block. */
555		mtx_lock(&xs.ring_lock);
556		cons = xen_store->rsp_cons;
557		prod = xen_store->rsp_prod;
558		if (cons == prod) {
559			/*
560			 * Nothing to read. Wait for a ring event.
561			 *
562			 * Note that the events from both queues
563			 * are combined, so being woken does not
564			 * guarantee that data exist in the read
565			 * ring.
566			 *
567			 * To simplify error recovery and the retry,
568			 * we specify PDROP so our lock is *not* held
569			 * when msleep returns.
570			 */
571			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
572			    "xbread", /*timeout*/0);
573			if (error && error != EWOULDBLOCK)
574				return (error);
575			continue;
576		}
577		mtx_unlock(&xs.ring_lock);
578
579		/* Verify queue sanity. */
580		if (!xs_check_indexes(cons, prod)) {
581			xen_store->rsp_cons = xen_store->rsp_prod = 0;
582			return (EIO);
583		}
584
585		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
586		if (avail > len)
587			avail = len;
588
589		/*
590		 * Insure the data we read is related to the indexes
591		 * we read above.
592		 */
593		rmb();
594
595		memcpy(data, src, avail);
596		data += avail;
597		len -= avail;
598
599		/*
600		 * Insure that the producer of this ring does not see
601		 * the ring space as free until after we have copied it
602		 * out.
603		 */
604		mb();
605		xen_store->rsp_cons += avail;
606
607		/*
608		 * xen_intr_signal() implies mb(). The producer will see
609		 * the updated consumer index when the event is delivered.
610		 */
611		xen_intr_signal(xs.xen_intr_handle);
612	}
613
614	return (0);
615}
616
617/*----------------------- Received Message Processing ------------------------*/
618/**
619 * Block reading the next message from the XenStore service and
620 * process the result.
621 *
622 * \param type  The returned type of the XenStore message received.
623 *
624 * \return  0 on success.  Otherwise an errno value indicating the
625 *          type of failure encountered.
626 */
627static int
628xs_process_msg(enum xsd_sockmsg_type *type)
629{
630	struct xs_stored_msg *msg;
631	char *body;
632	int error;
633
634	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
635	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
636	if (error) {
637		free(msg, M_XENSTORE);
638		return (error);
639	}
640
641	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
642	error = xs_read_store(body, msg->hdr.len);
643	if (error) {
644		free(body, M_XENSTORE);
645		free(msg, M_XENSTORE);
646		return (error);
647	}
648	body[msg->hdr.len] = '\0';
649
650	*type = msg->hdr.type;
651	if (msg->hdr.type == XS_WATCH_EVENT) {
652		msg->u.watch.vec = split(body, msg->hdr.len,
653		    &msg->u.watch.vec_size);
654
655		mtx_lock(&xs.registered_watches_lock);
656		msg->u.watch.handle = find_watch(
657		    msg->u.watch.vec[XS_WATCH_TOKEN]);
658		mtx_lock(&xs.watch_events_lock);
659		if (msg->u.watch.handle != NULL &&
660		    (!msg->u.watch.handle->max_pending ||
661		    msg->u.watch.handle->pending <
662		    msg->u.watch.handle->max_pending)) {
663			msg->u.watch.handle->pending++;
664			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
665			wakeup(&xs.watch_events);
666			mtx_unlock(&xs.watch_events_lock);
667		} else {
668			mtx_unlock(&xs.watch_events_lock);
669			free(msg->u.watch.vec, M_XENSTORE);
670			free(msg, M_XENSTORE);
671		}
672		mtx_unlock(&xs.registered_watches_lock);
673	} else {
674		msg->u.reply.body = body;
675		mtx_lock(&xs.reply_lock);
676		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
677		wakeup(&xs.reply_list);
678		mtx_unlock(&xs.reply_lock);
679	}
680
681	return (0);
682}
683
684/**
685 * Thread body of the XenStore receive thread.
686 *
687 * This thread blocks waiting for data from the XenStore service
688 * and processes and received messages.
689 */
690static void
691xs_rcv_thread(void *arg __unused)
692{
693	int error;
694	enum xsd_sockmsg_type type;
695
696	for (;;) {
697		error = xs_process_msg(&type);
698		if (error)
699			printf("XENSTORE error %d while reading message\n",
700			    error);
701	}
702}
703
704/*---------------- XenStore Message Request/Reply Processing -----------------*/
705#define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
706
707/**
708 * Convert a XenStore error string into an errno number.
709 *
710 * \param errorstring  The error string to convert.
711 *
712 * \return  The errno best matching the input string.
713 *
714 * \note Unknown error strings are converted to EINVAL.
715 */
716static int
717xs_get_error(const char *errorstring)
718{
719	u_int i;
720
721	for (i = 0; i < xsd_error_count; i++) {
722		if (!strcmp(errorstring, xsd_errors[i].errstring))
723			return (xsd_errors[i].errnum);
724	}
725	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
726	    errorstring);
727	return (EINVAL);
728}
729
730/**
731 * Block waiting for a reply to a message request.
732 *
733 * \param type	  The returned type of the reply.
734 * \param len	  The returned body length of the reply.
735 * \param result  The returned body of the reply.
736 *
737 * \return  0 on success.  Otherwise an errno indicating the
738 *          cause of failure.
739 */
740static int
741xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
742{
743	struct xs_stored_msg *msg;
744	char *body;
745	int error;
746
747	mtx_lock(&xs.reply_lock);
748	while (TAILQ_EMPTY(&xs.reply_list)) {
749		error = mtx_sleep(&xs.reply_list, &xs.reply_lock, 0, "xswait",
750		    hz/10);
751		if (error && error != EWOULDBLOCK) {
752			mtx_unlock(&xs.reply_lock);
753			return (error);
754		}
755	}
756	msg = TAILQ_FIRST(&xs.reply_list);
757	TAILQ_REMOVE(&xs.reply_list, msg, list);
758	mtx_unlock(&xs.reply_lock);
759
760	*type = msg->hdr.type;
761	if (len)
762		*len = msg->hdr.len;
763	body = msg->u.reply.body;
764
765	free(msg, M_XENSTORE);
766	*result = body;
767	return (0);
768}
769
770/**
771 * Pass-thru interface for XenStore access by userland processes
772 * via the XenStore device.
773 *
774 * Reply type and length data are returned by overwriting these
775 * fields in the passed in request message.
776 *
777 * \param msg	  A properly formatted message to transmit to
778 *		  the XenStore service.
779 * \param result  The returned body of the reply.
780 *
781 * \return  0 on success.  Otherwise an errno indicating the cause
782 *          of failure.
783 *
784 * \note The returned result is provided in malloced storage and thus
785 *       must be free'd by the caller with 'free(result, M_XENSTORE);
786 */
787int
788xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
789{
790	uint32_t request_type;
791	int error;
792
793	request_type = msg->type;
794
795	sx_xlock(&xs.request_mutex);
796	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
797		error = xs_read_reply(&msg->type, &msg->len, result);
798	sx_xunlock(&xs.request_mutex);
799
800	return (error);
801}
802
803/**
804 * Send a message with an optionally muti-part body to the XenStore service.
805 *
806 * \param t              The transaction to use for this request.
807 * \param request_type   The type of message to send.
808 * \param iovec          Pointers to the body sections of the request.
809 * \param num_vecs       The number of body sections in the request.
810 * \param len            The returned length of the reply.
811 * \param result         The returned body of the reply.
812 *
813 * \return  0 on success.  Otherwise an errno indicating
814 *          the cause of failure.
815 *
816 * \note The returned result is provided in malloced storage and thus
817 *       must be free'd by the caller with 'free(*result, M_XENSTORE);
818 */
819static int
820xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
821    const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
822{
823	struct xsd_sockmsg msg;
824	void *ret = NULL;
825	u_int i;
826	int error;
827
828	msg.tx_id = t.id;
829	msg.req_id = 0;
830	msg.type = request_type;
831	msg.len = 0;
832	for (i = 0; i < num_vecs; i++)
833		msg.len += iovec[i].iov_len;
834
835	sx_xlock(&xs.request_mutex);
836	error = xs_write_store(&msg, sizeof(msg));
837	if (error) {
838		printf("xs_talkv failed %d\n", error);
839		goto error_lock_held;
840	}
841
842	for (i = 0; i < num_vecs; i++) {
843		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
844		if (error) {
845			printf("xs_talkv failed %d\n", error);
846			goto error_lock_held;
847		}
848	}
849
850	error = xs_read_reply(&msg.type, len, &ret);
851
852error_lock_held:
853	sx_xunlock(&xs.request_mutex);
854	if (error)
855		return (error);
856
857	if (msg.type == XS_ERROR) {
858		error = xs_get_error(ret);
859		free(ret, M_XENSTORE);
860		return (error);
861	}
862
863	/* Reply is either error or an echo of our request message type. */
864	KASSERT(msg.type == request_type, ("bad xenstore message type"));
865
866	if (result)
867		*result = ret;
868	else
869		free(ret, M_XENSTORE);
870
871	return (0);
872}
873
874/**
875 * Wrapper for xs_talkv allowing easy transmission of a message with
876 * a single, contiguous, message body.
877 *
878 * \param t              The transaction to use for this request.
879 * \param request_type   The type of message to send.
880 * \param body           The body of the request.
881 * \param len            The returned length of the reply.
882 * \param result         The returned body of the reply.
883 *
884 * \return  0 on success.  Otherwise an errno indicating
885 *          the cause of failure.
886 *
887 * \note The returned result is provided in malloced storage and thus
888 *       must be free'd by the caller with 'free(*result, M_XENSTORE);
889 */
890static int
891xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
892    const char *body, u_int *len, void **result)
893{
894	struct iovec iovec;
895
896	iovec.iov_base = (void *)(uintptr_t)body;
897	iovec.iov_len = strlen(body) + 1;
898
899	return (xs_talkv(t, request_type, &iovec, 1, len, result));
900}
901
902/*------------------------- XenStore Watch Support ---------------------------*/
903/**
904 * Transmit a watch request to the XenStore service.
905 *
906 * \param path    The path in the XenStore to watch.
907 * \param tocken  A unique identifier for this watch.
908 *
909 * \return  0 on success.  Otherwise an errno indicating the
910 *          cause of failure.
911 */
912static int
913xs_watch(const char *path, const char *token)
914{
915	struct iovec iov[2];
916
917	iov[0].iov_base = (void *)(uintptr_t) path;
918	iov[0].iov_len = strlen(path) + 1;
919	iov[1].iov_base = (void *)(uintptr_t) token;
920	iov[1].iov_len = strlen(token) + 1;
921
922	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
923}
924
925/**
926 * Transmit an uwatch request to the XenStore service.
927 *
928 * \param path    The path in the XenStore to watch.
929 * \param tocken  A unique identifier for this watch.
930 *
931 * \return  0 on success.  Otherwise an errno indicating the
932 *          cause of failure.
933 */
934static int
935xs_unwatch(const char *path, const char *token)
936{
937	struct iovec iov[2];
938
939	iov[0].iov_base = (void *)(uintptr_t) path;
940	iov[0].iov_len = strlen(path) + 1;
941	iov[1].iov_base = (void *)(uintptr_t) token;
942	iov[1].iov_len = strlen(token) + 1;
943
944	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
945}
946
947/**
948 * Convert from watch token (unique identifier) to the associated
949 * internal tracking structure for this watch.
950 *
951 * \param tocken  The unique identifier for the watch to find.
952 *
953 * \return  A pointer to the found watch structure or NULL.
954 */
955static struct xs_watch *
956find_watch(const char *token)
957{
958	struct xs_watch *i, *cmp;
959
960	cmp = (void *)strtoul(token, NULL, 16);
961
962	LIST_FOREACH(i, &xs.registered_watches, list)
963		if (i == cmp)
964			return (i);
965
966	return (NULL);
967}
968
969/**
970 * Thread body of the XenStore watch event dispatch thread.
971 */
972static void
973xenwatch_thread(void *unused)
974{
975	struct xs_stored_msg *msg;
976
977	for (;;) {
978		mtx_lock(&xs.watch_events_lock);
979		while (TAILQ_EMPTY(&xs.watch_events))
980			mtx_sleep(&xs.watch_events,
981			    &xs.watch_events_lock,
982			    PWAIT | PCATCH, "waitev", hz/10);
983
984		mtx_unlock(&xs.watch_events_lock);
985		sx_xlock(&xs.xenwatch_mutex);
986
987		mtx_lock(&xs.watch_events_lock);
988		msg = TAILQ_FIRST(&xs.watch_events);
989		if (msg) {
990			TAILQ_REMOVE(&xs.watch_events, msg, list);
991			msg->u.watch.handle->pending--;
992		}
993		mtx_unlock(&xs.watch_events_lock);
994
995		if (msg != NULL) {
996			/*
997			 * XXX There are messages coming in with a NULL
998			 * XXX callback.  This deserves further investigation;
999			 * XXX the workaround here simply prevents the kernel
1000			 * XXX from panic'ing on startup.
1001			 */
1002			if (msg->u.watch.handle->callback != NULL)
1003				msg->u.watch.handle->callback(
1004					msg->u.watch.handle,
1005					(const char **)msg->u.watch.vec,
1006					msg->u.watch.vec_size);
1007			free(msg->u.watch.vec, M_XENSTORE);
1008			free(msg, M_XENSTORE);
1009		}
1010
1011		sx_xunlock(&xs.xenwatch_mutex);
1012	}
1013}
1014
1015/*----------- XenStore Configuration, Initialization, and Control ------------*/
1016/**
1017 * Setup communication channels with the XenStore service.
1018 *
1019 * \return  On success, 0. Otherwise an errno value indicating the
1020 *          type of failure.
1021 */
1022static int
1023xs_init_comms(void)
1024{
1025	int error;
1026
1027	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1028		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1029		    "(%08x:%08x): fixing up\n",
1030		    xen_store->rsp_cons, xen_store->rsp_prod);
1031		xen_store->rsp_cons = xen_store->rsp_prod;
1032	}
1033
1034	xen_intr_unbind(&xs.xen_intr_handle);
1035
1036	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1037	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1038	    &xs.xen_intr_handle);
1039	if (error) {
1040		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1041		return (error);
1042	}
1043
1044	return (0);
1045}
1046
1047/*------------------ Private Device Attachment Functions  --------------------*/
1048static void
1049xs_identify(driver_t *driver, device_t parent)
1050{
1051
1052	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1053}
1054
1055/**
1056 * Probe for the existence of the XenStore.
1057 *
1058 * \param dev
1059 */
1060static int
1061xs_probe(device_t dev)
1062{
1063	/*
1064	 * We are either operating within a PV kernel or being probed
1065	 * as the child of the successfully attached xenpci device.
1066	 * Thus we are in a Xen environment and there will be a XenStore.
1067	 * Unconditionally return success.
1068	 */
1069	device_set_desc(dev, "XenStore");
1070	return (BUS_PROBE_NOWILDCARD);
1071}
1072
1073static void
1074xs_attach_deferred(void *arg)
1075{
1076
1077	bus_generic_probe(xs.xs_dev);
1078	bus_generic_attach(xs.xs_dev);
1079
1080	config_intrhook_disestablish(&xs.xs_attachcb);
1081}
1082
1083static void
1084xs_attach_late(void *arg, int pending)
1085{
1086
1087	KASSERT((pending == 1), ("xs late attach queued several times"));
1088	bus_generic_probe(xs.xs_dev);
1089	bus_generic_attach(xs.xs_dev);
1090}
1091
1092/**
1093 * Attach to the XenStore.
1094 *
1095 * This routine also prepares for the probe/attach of drivers that rely
1096 * on the XenStore.
1097 */
1098static int
1099xs_attach(device_t dev)
1100{
1101	int error;
1102
1103	/* Allow us to get device_t from softc and vice-versa. */
1104	xs.xs_dev = dev;
1105	device_set_softc(dev, &xs);
1106
1107	/* Initialize the interface to xenstore. */
1108	struct proc *p;
1109
1110	xs.initialized = false;
1111	xs.evtchn = xen_get_xenstore_evtchn();
1112	if (xs.evtchn == 0) {
1113		struct evtchn_alloc_unbound alloc_unbound;
1114
1115		/* Allocate a local event channel for xenstore */
1116		alloc_unbound.dom = DOMID_SELF;
1117		alloc_unbound.remote_dom = DOMID_SELF;
1118		error = HYPERVISOR_event_channel_op(
1119		    EVTCHNOP_alloc_unbound, &alloc_unbound);
1120		if (error != 0)
1121			panic(
1122			   "unable to alloc event channel for Dom0: %d",
1123			    error);
1124
1125		xs.evtchn = alloc_unbound.port;
1126
1127		/* Allocate memory for the xs shared ring */
1128		xen_store = malloc(PAGE_SIZE, M_XENSTORE, M_WAITOK | M_ZERO);
1129		xs.gpfn = atop(pmap_kextract((vm_offset_t)xen_store));
1130	} else {
1131		xs.gpfn = xen_get_xenstore_mfn();
1132		xen_store = pmap_mapdev_attr(ptoa(xs.gpfn), PAGE_SIZE,
1133		    PAT_WRITE_BACK);
1134		xs.initialized = true;
1135	}
1136
1137	TAILQ_INIT(&xs.reply_list);
1138	TAILQ_INIT(&xs.watch_events);
1139
1140	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1141	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1142	sx_init(&xs.xenwatch_mutex, "xenwatch");
1143	sx_init(&xs.request_mutex, "xenstore request");
1144	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1145	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1146
1147	/* Initialize the shared memory rings to talk to xenstored */
1148	error = xs_init_comms();
1149	if (error)
1150		return (error);
1151
1152	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1153	    0, "xenwatch");
1154	if (error)
1155		return (error);
1156	xs.xenwatch_pid = p->p_pid;
1157
1158	error = kproc_create(xs_rcv_thread, NULL, NULL,
1159	    RFHIGHPID, 0, "xenstore_rcv");
1160
1161	xs.xs_attachcb.ich_func = xs_attach_deferred;
1162	xs.xs_attachcb.ich_arg = NULL;
1163	if (xs.initialized) {
1164		config_intrhook_establish(&xs.xs_attachcb);
1165	} else {
1166		TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL);
1167	}
1168
1169	return (error);
1170}
1171
1172/**
1173 * Prepare for suspension of this VM by halting XenStore access after
1174 * all transactions and individual requests have completed.
1175 */
1176static int
1177xs_suspend(device_t dev)
1178{
1179	int error;
1180
1181	/* Suspend child Xen devices. */
1182	error = bus_generic_suspend(dev);
1183	if (error != 0)
1184		return (error);
1185
1186	sx_xlock(&xs.request_mutex);
1187
1188	return (0);
1189}
1190
1191/**
1192 * Resume XenStore operations after this VM is resumed.
1193 */
1194static int
1195xs_resume(device_t dev __unused)
1196{
1197	struct xs_watch *watch;
1198	char token[sizeof(watch) * 2 + 1];
1199
1200	xs_init_comms();
1201
1202	sx_xunlock(&xs.request_mutex);
1203
1204	/*
1205	 * NB: since xenstore childs have not been resumed yet, there's
1206	 * no need to hold any watch mutex. Having clients try to add or
1207	 * remove watches at this point (before xenstore is resumed) is
1208	 * clearly a violantion of the resume order.
1209	 */
1210	LIST_FOREACH(watch, &xs.registered_watches, list) {
1211		sprintf(token, "%lX", (long)watch);
1212		xs_watch(watch->node, token);
1213	}
1214
1215	/* Resume child Xen devices. */
1216	bus_generic_resume(dev);
1217
1218	return (0);
1219}
1220
1221/*-------------------- Private Device Attachment Data  -----------------------*/
1222static device_method_t xenstore_methods[] = {
1223	/* Device interface */
1224	DEVMETHOD(device_identify,	xs_identify),
1225	DEVMETHOD(device_probe,         xs_probe),
1226	DEVMETHOD(device_attach,        xs_attach),
1227	DEVMETHOD(device_detach,        bus_generic_detach),
1228	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1229	DEVMETHOD(device_suspend,       xs_suspend),
1230	DEVMETHOD(device_resume,        xs_resume),
1231
1232	/* Bus interface */
1233	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1234	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1235	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1236	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1237	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1238
1239	DEVMETHOD_END
1240};
1241
1242DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1243static devclass_t xenstore_devclass;
1244
1245DRIVER_MODULE(xenstore, xenpv, xenstore_driver, xenstore_devclass, 0, 0);
1246
1247/*------------------------------- Sysctl Data --------------------------------*/
1248/* XXX Shouldn't the node be somewhere else? */
1249SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1250    "Xen");
1251SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1252SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1253
1254/*-------------------------------- Public API --------------------------------*/
1255/*------- API comments for these methods can be found in xenstorevar.h -------*/
1256bool
1257xs_initialized(void)
1258{
1259
1260	return (xs.initialized);
1261}
1262
1263evtchn_port_t
1264xs_evtchn(void)
1265{
1266
1267    return (xs.evtchn);
1268}
1269
1270vm_paddr_t
1271xs_address(void)
1272{
1273
1274    return (ptoa(xs.gpfn));
1275}
1276
1277int
1278xs_directory(struct xs_transaction t, const char *dir, const char *node,
1279    u_int *num, const char ***result)
1280{
1281	struct sbuf *path;
1282	char *strings;
1283	u_int len = 0;
1284	int error;
1285
1286	path = xs_join(dir, node);
1287	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1288	    (void **)&strings);
1289	sbuf_delete(path);
1290	if (error)
1291		return (error);
1292
1293	*result = split(strings, len, num);
1294
1295	return (0);
1296}
1297
1298int
1299xs_exists(struct xs_transaction t, const char *dir, const char *node)
1300{
1301	const char **d;
1302	int error, dir_n;
1303
1304	error = xs_directory(t, dir, node, &dir_n, &d);
1305	if (error)
1306		return (0);
1307	free(d, M_XENSTORE);
1308	return (1);
1309}
1310
1311int
1312xs_read(struct xs_transaction t, const char *dir, const char *node,
1313    u_int *len, void **result)
1314{
1315	struct sbuf *path;
1316	void *ret;
1317	int error;
1318
1319	path = xs_join(dir, node);
1320	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1321	sbuf_delete(path);
1322	if (error)
1323		return (error);
1324	*result = ret;
1325	return (0);
1326}
1327
1328int
1329xs_write(struct xs_transaction t, const char *dir, const char *node,
1330    const char *string)
1331{
1332	struct sbuf *path;
1333	struct iovec iovec[2];
1334	int error;
1335
1336	path = xs_join(dir, node);
1337
1338	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1339	iovec[0].iov_len = sbuf_len(path) + 1;
1340	iovec[1].iov_base = (void *)(uintptr_t) string;
1341	iovec[1].iov_len = strlen(string);
1342
1343	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1344	sbuf_delete(path);
1345
1346	return (error);
1347}
1348
1349int
1350xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1351{
1352	struct sbuf *path;
1353	int ret;
1354
1355	path = xs_join(dir, node);
1356	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1357	sbuf_delete(path);
1358
1359	return (ret);
1360}
1361
1362int
1363xs_rm(struct xs_transaction t, const char *dir, const char *node)
1364{
1365	struct sbuf *path;
1366	int ret;
1367
1368	path = xs_join(dir, node);
1369	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1370	sbuf_delete(path);
1371
1372	return (ret);
1373}
1374
1375int
1376xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1377{
1378	struct xs_transaction local_xbt;
1379	struct sbuf *root_path_sbuf;
1380	struct sbuf *cur_path_sbuf;
1381	char *root_path;
1382	char *cur_path;
1383	const char **dir;
1384	int error;
1385
1386retry:
1387	root_path_sbuf = xs_join(base, node);
1388	cur_path_sbuf  = xs_join(base, node);
1389	root_path      = sbuf_data(root_path_sbuf);
1390	cur_path       = sbuf_data(cur_path_sbuf);
1391	dir            = NULL;
1392	local_xbt.id   = 0;
1393
1394	if (xbt.id == 0) {
1395		error = xs_transaction_start(&local_xbt);
1396		if (error != 0)
1397			goto out;
1398		xbt = local_xbt;
1399	}
1400
1401	while (1) {
1402		u_int count;
1403		u_int i;
1404
1405		error = xs_directory(xbt, cur_path, "", &count, &dir);
1406		if (error)
1407			goto out;
1408
1409		for (i = 0; i < count; i++) {
1410			error = xs_rm(xbt, cur_path, dir[i]);
1411			if (error == ENOTEMPTY) {
1412				struct sbuf *push_dir;
1413
1414				/*
1415				 * Descend to clear out this sub directory.
1416				 * We'll return to cur_dir once push_dir
1417				 * is empty.
1418				 */
1419				push_dir = xs_join(cur_path, dir[i]);
1420				sbuf_delete(cur_path_sbuf);
1421				cur_path_sbuf = push_dir;
1422				cur_path = sbuf_data(cur_path_sbuf);
1423				break;
1424			} else if (error != 0) {
1425				goto out;
1426			}
1427		}
1428
1429		free(dir, M_XENSTORE);
1430		dir = NULL;
1431
1432		if (i == count) {
1433			char *last_slash;
1434
1435			/* Directory is empty.  It is now safe to remove. */
1436			error = xs_rm(xbt, cur_path, "");
1437			if (error != 0)
1438				goto out;
1439
1440			if (!strcmp(cur_path, root_path))
1441				break;
1442
1443			/* Return to processing the parent directory. */
1444			last_slash = strrchr(cur_path, '/');
1445			KASSERT(last_slash != NULL,
1446				("xs_rm_tree: mangled path %s", cur_path));
1447			*last_slash = '\0';
1448		}
1449	}
1450
1451out:
1452	sbuf_delete(cur_path_sbuf);
1453	sbuf_delete(root_path_sbuf);
1454	if (dir != NULL)
1455		free(dir, M_XENSTORE);
1456
1457	if (local_xbt.id != 0) {
1458		int terror;
1459
1460		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1461		xbt.id = 0;
1462		if (terror == EAGAIN && error == 0)
1463			goto retry;
1464	}
1465	return (error);
1466}
1467
1468int
1469xs_transaction_start(struct xs_transaction *t)
1470{
1471	char *id_str;
1472	int error;
1473
1474	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1475	    (void **)&id_str);
1476	if (error == 0) {
1477		t->id = strtoul(id_str, NULL, 0);
1478		free(id_str, M_XENSTORE);
1479	}
1480	return (error);
1481}
1482
1483int
1484xs_transaction_end(struct xs_transaction t, int abort)
1485{
1486	char abortstr[2];
1487
1488	if (abort)
1489		strcpy(abortstr, "F");
1490	else
1491		strcpy(abortstr, "T");
1492
1493	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1494}
1495
1496int
1497xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1498     int *scancountp, const char *fmt, ...)
1499{
1500	va_list ap;
1501	int error, ns;
1502	char *val;
1503
1504	error = xs_read(t, dir, node, NULL, (void **) &val);
1505	if (error)
1506		return (error);
1507
1508	va_start(ap, fmt);
1509	ns = vsscanf(val, fmt, ap);
1510	va_end(ap);
1511	free(val, M_XENSTORE);
1512	/* Distinctive errno. */
1513	if (ns == 0)
1514		return (ERANGE);
1515	if (scancountp)
1516		*scancountp = ns;
1517	return (0);
1518}
1519
1520int
1521xs_vprintf(struct xs_transaction t,
1522    const char *dir, const char *node, const char *fmt, va_list ap)
1523{
1524	struct sbuf *sb;
1525	int error;
1526
1527	sb = sbuf_new_auto();
1528	sbuf_vprintf(sb, fmt, ap);
1529	sbuf_finish(sb);
1530	error = xs_write(t, dir, node, sbuf_data(sb));
1531	sbuf_delete(sb);
1532
1533	return (error);
1534}
1535
1536int
1537xs_printf(struct xs_transaction t, const char *dir, const char *node,
1538     const char *fmt, ...)
1539{
1540	va_list ap;
1541	int error;
1542
1543	va_start(ap, fmt);
1544	error = xs_vprintf(t, dir, node, fmt, ap);
1545	va_end(ap);
1546
1547	return (error);
1548}
1549
1550int
1551xs_gather(struct xs_transaction t, const char *dir, ...)
1552{
1553	va_list ap;
1554	const char *name;
1555	int error;
1556
1557	va_start(ap, dir);
1558	error = 0;
1559	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1560		const char *fmt = va_arg(ap, char *);
1561		void *result = va_arg(ap, void *);
1562		char *p;
1563
1564		error = xs_read(t, dir, name, NULL, (void **) &p);
1565		if (error)
1566			break;
1567
1568		if (fmt) {
1569			if (sscanf(p, fmt, result) == 0)
1570				error = EINVAL;
1571			free(p, M_XENSTORE);
1572		} else
1573			*(char **)result = p;
1574	}
1575	va_end(ap);
1576
1577	return (error);
1578}
1579
1580int
1581xs_register_watch(struct xs_watch *watch)
1582{
1583	/* Pointer in ascii is the token. */
1584	char token[sizeof(watch) * 2 + 1];
1585	int error;
1586
1587	watch->pending = 0;
1588	sprintf(token, "%lX", (long)watch);
1589
1590	mtx_lock(&xs.registered_watches_lock);
1591	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1592	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1593	mtx_unlock(&xs.registered_watches_lock);
1594
1595	error = xs_watch(watch->node, token);
1596
1597	/* Ignore errors due to multiple registration. */
1598	if (error == EEXIST)
1599		error = 0;
1600
1601	if (error != 0) {
1602		mtx_lock(&xs.registered_watches_lock);
1603		LIST_REMOVE(watch, list);
1604		mtx_unlock(&xs.registered_watches_lock);
1605	}
1606
1607	return (error);
1608}
1609
1610void
1611xs_unregister_watch(struct xs_watch *watch)
1612{
1613	struct xs_stored_msg *msg, *tmp;
1614	char token[sizeof(watch) * 2 + 1];
1615	int error;
1616
1617	sprintf(token, "%lX", (long)watch);
1618
1619	mtx_lock(&xs.registered_watches_lock);
1620	if (find_watch(token) == NULL) {
1621		mtx_unlock(&xs.registered_watches_lock);
1622		return;
1623	}
1624	LIST_REMOVE(watch, list);
1625	mtx_unlock(&xs.registered_watches_lock);
1626
1627	error = xs_unwatch(watch->node, token);
1628	if (error)
1629		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1630		    watch->node, error);
1631
1632	/* Cancel pending watch events. */
1633	mtx_lock(&xs.watch_events_lock);
1634	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1635		if (msg->u.watch.handle != watch)
1636			continue;
1637		TAILQ_REMOVE(&xs.watch_events, msg, list);
1638		free(msg->u.watch.vec, M_XENSTORE);
1639		free(msg, M_XENSTORE);
1640	}
1641	mtx_unlock(&xs.watch_events_lock);
1642
1643	/* Flush any currently-executing callback, unless we are it. :-) */
1644	if (curproc->p_pid != xs.xenwatch_pid) {
1645		sx_xlock(&xs.xenwatch_mutex);
1646		sx_xunlock(&xs.xenwatch_mutex);
1647	}
1648}
1649
1650void
1651xs_lock(void)
1652{
1653
1654	sx_xlock(&xs.request_mutex);
1655	return;
1656}
1657
1658void
1659xs_unlock(void)
1660{
1661
1662	sx_xunlock(&xs.request_mutex);
1663	return;
1664}
1665