1181624Skmacy/******************************************************************************
2214077Sgibbs * xenstore.c
3181624Skmacy *
4214077Sgibbs * Low-level kernel interface to the XenStore.
5181624Skmacy *
6181624Skmacy * Copyright (C) 2005 Rusty Russell, IBM Corporation
7214077Sgibbs * Copyright (C) 2009,2010 Spectra Logic Corporation
8214077Sgibbs *
9181624Skmacy * This file may be distributed separately from the Linux kernel, or
10181624Skmacy * incorporated into other software packages, subject to the following license:
11214077Sgibbs *
12181624Skmacy * Permission is hereby granted, free of charge, to any person obtaining a copy
13181624Skmacy * of this source file (the "Software"), to deal in the Software without
14181624Skmacy * restriction, including without limitation the rights to use, copy, modify,
15181624Skmacy * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16181624Skmacy * and to permit persons to whom the Software is furnished to do so, subject to
17181624Skmacy * the following conditions:
18214077Sgibbs *
19181624Skmacy * The above copyright notice and this permission notice shall be included in
20181624Skmacy * all copies or substantial portions of the Software.
21214077Sgibbs *
22181624Skmacy * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23181624Skmacy * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24181624Skmacy * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25181624Skmacy * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26181624Skmacy * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27181624Skmacy * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28181624Skmacy * IN THE SOFTWARE.
29181624Skmacy */
30181624Skmacy
31181624Skmacy
32181624Skmacy#include <sys/cdefs.h>
33181624Skmacy__FBSDID("$FreeBSD: releng/11.0/sys/dev/xen/xenstore/xenstore.c 316722 2017-04-12 06:24:35Z delphij $");
34181624Skmacy
35181624Skmacy#include <sys/param.h>
36214077Sgibbs#include <sys/bus.h>
37181624Skmacy#include <sys/kernel.h>
38181624Skmacy#include <sys/lock.h>
39214077Sgibbs#include <sys/module.h>
40181624Skmacy#include <sys/mutex.h>
41181624Skmacy#include <sys/sx.h>
42181624Skmacy#include <sys/syslog.h>
43181624Skmacy#include <sys/malloc.h>
44181624Skmacy#include <sys/systm.h>
45181624Skmacy#include <sys/proc.h>
46181624Skmacy#include <sys/kthread.h>
47214077Sgibbs#include <sys/sbuf.h>
48214077Sgibbs#include <sys/sysctl.h>
49214077Sgibbs#include <sys/uio.h>
50186557Skmacy#include <sys/unistd.h>
51272317Sroyger#include <sys/queue.h>
52272317Sroyger#include <sys/taskqueue.h>
53181624Skmacy
54181624Skmacy#include <machine/stdarg.h>
55181624Skmacy
56255040Sgibbs#include <xen/xen-os.h>
57214077Sgibbs#include <xen/hypervisor.h>
58214077Sgibbs#include <xen/xen_intr.h>
59214077Sgibbs
60186557Skmacy#include <xen/interface/hvm/params.h>
61251767Sgibbs#include <xen/hvm.h>
62186557Skmacy
63214077Sgibbs#include <xen/xenstore/xenstorevar.h>
64214077Sgibbs#include <xen/xenstore/xenstore_internal.h>
65214077Sgibbs
66186557Skmacy#include <vm/vm.h>
67186557Skmacy#include <vm/pmap.h>
68186557Skmacy
69214077Sgibbs/**
70214077Sgibbs * \file xenstore.c
71214077Sgibbs * \brief XenStore interface
72214077Sgibbs *
73214077Sgibbs * The XenStore interface is a simple storage system that is a means of
74214077Sgibbs * communicating state and configuration data between the Xen Domain 0
75214077Sgibbs * and the various guest domains.  All configuration data other than
76214077Sgibbs * a small amount of essential information required during the early
77214077Sgibbs * boot process of launching a Xen aware guest, is managed using the
78214077Sgibbs * XenStore.
79214077Sgibbs *
80214077Sgibbs * The XenStore is ASCII string based, and has a structure and semantics
81214077Sgibbs * similar to a filesystem.  There are files and directories, the directories
82298955Spfg * able to contain files or other directories.  The depth of the hierarchy
83214077Sgibbs * is only limited by the XenStore's maximum path length.
84214077Sgibbs *
85214077Sgibbs * The communication channel between the XenStore service and other
86214077Sgibbs * domains is via two, guest specific, ring buffers in a shared memory
87214077Sgibbs * area.  One ring buffer is used for communicating in each direction.
88214077Sgibbs * The grant table references for this shared memory are given to the
89214077Sgibbs * guest either via the xen_start_info structure for a fully para-
90214077Sgibbs * virtualized guest, or via HVM hypercalls for a hardware virtualized
91214077Sgibbs * guest.
92214077Sgibbs *
93214077Sgibbs * The XenStore communication relies on an event channel and thus
94214077Sgibbs * interrupts.  For this reason, the attachment of the XenStore
95214077Sgibbs * relies on an interrupt driven configuration hook to hold off
96214077Sgibbs * boot processing until communication with the XenStore service
97214077Sgibbs * can be established.
98214077Sgibbs *
99214077Sgibbs * Several Xen services depend on the XenStore, most notably the
100214077Sgibbs * XenBus used to discover and manage Xen devices.  These services
101214077Sgibbs * are implemented as NewBus child attachments to a bus exported
102214077Sgibbs * by this XenStore driver.
103214077Sgibbs */
104181624Skmacy
105214077Sgibbsstatic struct xs_watch *find_watch(const char *token);
106181624Skmacy
107214077SgibbsMALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
108214077Sgibbs
109214077Sgibbs/**
110214077Sgibbs * Pointer to shared memory communication structures allowing us
111214077Sgibbs * to communicate with the XenStore service.
112214077Sgibbs *
113214077Sgibbs * When operating in full PV mode, this pointer is set early in kernel
114214077Sgibbs * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
115214077Sgibbs * to get the guest frame number for the shared page and then map it
116214077Sgibbs * into kva.  See xs_init() for details.
117214077Sgibbs */
118214077Sgibbsstruct xenstore_domain_interface *xen_store;
119214077Sgibbs
120214077Sgibbs/*-------------------------- Private Data Structures ------------------------*/
121214077Sgibbs
122214077Sgibbs/**
123214077Sgibbs * Structure capturing messages received from the XenStore service.
124214077Sgibbs */
125181624Skmacystruct xs_stored_msg {
126186557Skmacy	TAILQ_ENTRY(xs_stored_msg) list;
127181624Skmacy
128186557Skmacy	struct xsd_sockmsg hdr;
129181624Skmacy
130186557Skmacy	union {
131186557Skmacy		/* Queued replies. */
132186557Skmacy		struct {
133186557Skmacy			char *body;
134186557Skmacy		} reply;
135181624Skmacy
136186557Skmacy		/* Queued watch events. */
137186557Skmacy		struct {
138214077Sgibbs			struct xs_watch *handle;
139214077Sgibbs			const char **vec;
140214077Sgibbs			u_int vec_size;
141186557Skmacy		} watch;
142186557Skmacy	} u;
143181624Skmacy};
144214077SgibbsTAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
145181624Skmacy
146214077Sgibbs/**
147214077Sgibbs * Container for all XenStore related state.
148214077Sgibbs */
149214077Sgibbsstruct xs_softc {
150214077Sgibbs	/** Newbus device for the XenStore. */
151214077Sgibbs	device_t xs_dev;
152181624Skmacy
153214077Sgibbs	/**
154214077Sgibbs	 * Lock serializing access to ring producer/consumer
155214077Sgibbs	 * indexes.  Use of this lock guarantees that wakeups
156214077Sgibbs	 * of blocking readers/writers are not missed due to
157214077Sgibbs	 * races with the XenStore service.
158214077Sgibbs	 */
159214077Sgibbs	struct mtx ring_lock;
160214077Sgibbs
161214077Sgibbs	/*
162214077Sgibbs	 * Mutex used to insure exclusive access to the outgoing
163214077Sgibbs	 * communication ring.  We use a lock type that can be
164214077Sgibbs	 * held while sleeping so that xs_write() can block waiting
165214077Sgibbs	 * for space in the ring to free up, without allowing another
166214077Sgibbs	 * writer to come in and corrupt a partial message write.
167214077Sgibbs	 */
168186557Skmacy	struct sx request_mutex;
169181624Skmacy
170214077Sgibbs	/**
171214077Sgibbs	 * A list of replies to our requests.
172214077Sgibbs	 *
173214077Sgibbs	 * The reply list is filled by xs_rcv_thread().  It
174214077Sgibbs	 * is consumed by the context that issued the request
175214077Sgibbs	 * to which a reply is made.  The requester blocks in
176214077Sgibbs	 * xs_read_reply().
177214077Sgibbs	 *
178214077Sgibbs	 * /note Only one requesting context can be active at a time.
179214077Sgibbs	 *       This is guaranteed by the request_mutex and insures
180214077Sgibbs	 *	 that the requester sees replies matching the order
181214077Sgibbs	 *	 of its requests.
182214077Sgibbs	 */
183214077Sgibbs	struct xs_stored_msg_list reply_list;
184214077Sgibbs
185214077Sgibbs	/** Lock protecting the reply list. */
186214077Sgibbs	struct mtx reply_lock;
187214077Sgibbs
188214077Sgibbs	/**
189214077Sgibbs	 * List of registered watches.
190214077Sgibbs	 */
191214077Sgibbs	struct xs_watch_list  registered_watches;
192214077Sgibbs
193214077Sgibbs	/** Lock protecting the registered watches list. */
194214077Sgibbs	struct mtx registered_watches_lock;
195214077Sgibbs
196214077Sgibbs	/**
197214077Sgibbs	 * List of pending watch callback events.
198214077Sgibbs	 */
199214077Sgibbs	struct xs_stored_msg_list watch_events;
200214077Sgibbs
201214077Sgibbs	/** Lock protecting the watch calback list. */
202214077Sgibbs	struct mtx watch_events_lock;
203214077Sgibbs
204214077Sgibbs	/**
205214077Sgibbs	 * Sleepable lock used to prevent VM suspension while a
206214077Sgibbs	 * xenstore transaction is outstanding.
207214077Sgibbs	 *
208214077Sgibbs	 * Each active transaction holds a shared lock on the
209214077Sgibbs	 * suspend mutex.  Our suspend method blocks waiting
210214077Sgibbs	 * to acquire an exclusive lock.  This guarantees that
211214077Sgibbs	 * suspend processing will only proceed once all active
212214077Sgibbs	 * transactions have been retired.
213214077Sgibbs	 */
214186557Skmacy	struct sx suspend_mutex;
215214077Sgibbs
216214077Sgibbs	/**
217214077Sgibbs	 * The processid of the xenwatch thread.
218214077Sgibbs	 */
219214077Sgibbs	pid_t xenwatch_pid;
220214077Sgibbs
221214077Sgibbs	/**
222214077Sgibbs	 * Sleepable mutex used to gate the execution of XenStore
223214077Sgibbs	 * watch event callbacks.
224214077Sgibbs	 *
225214077Sgibbs	 * xenwatch_thread holds an exclusive lock on this mutex
226214077Sgibbs	 * while delivering event callbacks, and xenstore_unregister_watch()
227214077Sgibbs	 * uses an exclusive lock of this mutex to guarantee that no
228214077Sgibbs	 * callbacks of the just unregistered watch are pending
229214077Sgibbs	 * before returning to its caller.
230214077Sgibbs	 */
231214077Sgibbs	struct sx xenwatch_mutex;
232214077Sgibbs
233214077Sgibbs	/**
234214077Sgibbs	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
235214077Sgibbs	 * of the true machine frame number into our "physical address space".
236214077Sgibbs	 */
237214077Sgibbs	unsigned long gpfn;
238214077Sgibbs
239214077Sgibbs	/**
240214077Sgibbs	 * The event channel for communicating with the
241214077Sgibbs	 * XenStore service.
242214077Sgibbs	 */
243214077Sgibbs	int evtchn;
244214077Sgibbs
245255040Sgibbs	/** Handle for XenStore interrupts. */
246255040Sgibbs	xen_intr_handle_t xen_intr_handle;
247214077Sgibbs
248214077Sgibbs	/**
249214077Sgibbs	 * Interrupt driven config hook allowing us to defer
250214077Sgibbs	 * attaching children until interrupts (and thus communication
251214077Sgibbs	 * with the XenStore service) are available.
252214077Sgibbs	 */
253214077Sgibbs	struct intr_config_hook xs_attachcb;
254272317Sroyger
255272317Sroyger	/**
256272317Sroyger	 * Xenstore is a user-space process that usually runs in Dom0,
257272317Sroyger	 * so if this domain is booting as Dom0, xenstore wont we accessible,
258272317Sroyger	 * and we have to defer the initialization of xenstore related
259272317Sroyger	 * devices to later (when xenstore is started).
260272317Sroyger	 */
261272317Sroyger	bool initialized;
262272317Sroyger
263272317Sroyger	/**
264272317Sroyger	 * Task to run when xenstore is initialized (Dom0 only), will
265272317Sroyger	 * take care of attaching xenstore related devices.
266272317Sroyger	 */
267272317Sroyger	struct task xs_late_init;
268181624Skmacy};
269181624Skmacy
270214077Sgibbs/*-------------------------------- Global Data ------------------------------*/
271214077Sgibbsstatic struct xs_softc xs;
272181624Skmacy
273214077Sgibbs/*------------------------- Private Utility Functions -----------------------*/
274186557Skmacy
275214077Sgibbs/**
276214077Sgibbs * Count and optionally record pointers to a number of NUL terminated
277214077Sgibbs * strings in a buffer.
278214077Sgibbs *
279214077Sgibbs * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
280214077Sgibbs * \param dest	   An array to store pointers to each string found in strings.
281214077Sgibbs * \param len	   The length of the buffer pointed to by strings.
282214077Sgibbs *
283214077Sgibbs * \return  A count of the number of strings found.
284181624Skmacy */
285214077Sgibbsstatic u_int
286214077Sgibbsextract_strings(const char *strings, const char **dest, u_int len)
287214077Sgibbs{
288214077Sgibbs	u_int num;
289214077Sgibbs	const char *p;
290181624Skmacy
291214077Sgibbs	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
292214077Sgibbs		if (dest != NULL)
293214077Sgibbs			*dest++ = p;
294214077Sgibbs		num++;
295214077Sgibbs	}
296214077Sgibbs
297214077Sgibbs	return (num);
298214077Sgibbs}
299214077Sgibbs
300214077Sgibbs/**
301214077Sgibbs * Convert a contiguous buffer containing a series of NUL terminated
302214077Sgibbs * strings into an array of pointers to strings.
303214077Sgibbs *
304214077Sgibbs * The returned pointer references the array of string pointers which
305214077Sgibbs * is followed by the storage for the string data.  It is the client's
306214077Sgibbs * responsibility to free this storage.
307214077Sgibbs *
308214077Sgibbs * The storage addressed by strings is free'd prior to split returning.
309214077Sgibbs *
310214077Sgibbs * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
311214077Sgibbs * \param len	   The length of the buffer pointed to by strings.
312214077Sgibbs * \param num	   The number of strings found and returned in the strings
313214077Sgibbs *                 array.
314214077Sgibbs *
315214077Sgibbs * \return  An array of pointers to the strings found in the input buffer.
316214077Sgibbs */
317214077Sgibbsstatic const char **
318214077Sgibbssplit(char *strings, u_int len, u_int *num)
319214077Sgibbs{
320214077Sgibbs	const char **ret;
321214077Sgibbs
322214077Sgibbs	/* Protect against unterminated buffers. */
323250081Sgibbs	if (len > 0)
324250081Sgibbs		strings[len - 1] = '\0';
325214077Sgibbs
326214077Sgibbs	/* Count the strings. */
327214077Sgibbs	*num = extract_strings(strings, /*dest*/NULL, len);
328214077Sgibbs
329214077Sgibbs	/* Transfer to one big alloc for easy freeing by the caller. */
330214077Sgibbs	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
331214077Sgibbs	memcpy(&ret[*num], strings, len);
332214077Sgibbs	free(strings, M_XENSTORE);
333214077Sgibbs
334214077Sgibbs	/* Extract pointers to newly allocated array. */
335214077Sgibbs	strings = (char *)&ret[*num];
336214077Sgibbs	(void)extract_strings(strings, /*dest*/ret, len);
337214077Sgibbs
338214077Sgibbs	return (ret);
339214077Sgibbs}
340214077Sgibbs
341214077Sgibbs/*------------------------- Public Utility Functions -------------------------*/
342214077Sgibbs/*------- API comments for these methods can be found in xenstorevar.h -------*/
343214077Sgibbsstruct sbuf *
344214077Sgibbsxs_join(const char *dir, const char *name)
345214077Sgibbs{
346214077Sgibbs	struct sbuf *sb;
347214077Sgibbs
348214077Sgibbs	sb = sbuf_new_auto();
349214077Sgibbs	sbuf_cat(sb, dir);
350214077Sgibbs	if (name[0] != '\0') {
351214077Sgibbs		sbuf_putc(sb, '/');
352214077Sgibbs		sbuf_cat(sb, name);
353214077Sgibbs	}
354214077Sgibbs	sbuf_finish(sb);
355214077Sgibbs
356214077Sgibbs	return (sb);
357214077Sgibbs}
358214077Sgibbs
359214077Sgibbs/*-------------------- Low Level Communication Management --------------------*/
360214077Sgibbs/**
361214077Sgibbs * Interrupt handler for the XenStore event channel.
362214077Sgibbs *
363214077Sgibbs * XenStore reads and writes block on "xen_store" for buffer
364214077Sgibbs * space.  Wakeup any blocking operations when the XenStore
365214077Sgibbs * service has modified the queues.
366214077Sgibbs */
367214077Sgibbsstatic void
368214077Sgibbsxs_intr(void * arg __unused /*__attribute__((unused))*/)
369214077Sgibbs{
370214077Sgibbs
371272317Sroyger	/* If xenstore has not been initialized, initialize it now */
372272317Sroyger	if (!xs.initialized) {
373272317Sroyger		xs.initialized = true;
374272317Sroyger		/*
375272317Sroyger		 * Since this task is probing and attaching devices we
376272317Sroyger		 * have to hold the Giant lock.
377272317Sroyger		 */
378272317Sroyger		taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init);
379272317Sroyger	}
380272317Sroyger
381214077Sgibbs	/*
382214077Sgibbs	 * Hold ring lock across wakeup so that clients
383214077Sgibbs	 * cannot miss a wakeup.
384214077Sgibbs	 */
385214077Sgibbs	mtx_lock(&xs.ring_lock);
386214077Sgibbs	wakeup(xen_store);
387214077Sgibbs	mtx_unlock(&xs.ring_lock);
388214077Sgibbs}
389214077Sgibbs
390214077Sgibbs/**
391214077Sgibbs * Verify that the indexes for a ring are valid.
392214077Sgibbs *
393214077Sgibbs * The difference between the producer and consumer cannot
394214077Sgibbs * exceed the size of the ring.
395214077Sgibbs *
396214077Sgibbs * \param cons  The consumer index for the ring to test.
397214077Sgibbs * \param prod  The producer index for the ring to test.
398214077Sgibbs *
399214077Sgibbs * \retval 1  If indexes are in range.
400214077Sgibbs * \retval 0  If the indexes are out of range.
401214077Sgibbs */
402214077Sgibbsstatic int
403214077Sgibbsxs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
404214077Sgibbs{
405214077Sgibbs
406214077Sgibbs	return ((prod - cons) <= XENSTORE_RING_SIZE);
407214077Sgibbs}
408214077Sgibbs
409214077Sgibbs/**
410214077Sgibbs * Return a pointer to, and the length of, the contiguous
411214077Sgibbs * free region available for output in a ring buffer.
412214077Sgibbs *
413214077Sgibbs * \param cons  The consumer index for the ring.
414214077Sgibbs * \param prod  The producer index for the ring.
415214077Sgibbs * \param buf   The base address of the ring's storage.
416214077Sgibbs * \param len   The amount of contiguous storage available.
417214077Sgibbs *
418214077Sgibbs * \return  A pointer to the start location of the free region.
419214077Sgibbs */
420214077Sgibbsstatic void *
421214077Sgibbsxs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
422214077Sgibbs    char *buf, uint32_t *len)
423214077Sgibbs{
424214077Sgibbs
425214077Sgibbs	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
426214077Sgibbs	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
427214077Sgibbs		*len = XENSTORE_RING_SIZE - (prod - cons);
428214077Sgibbs	return (buf + MASK_XENSTORE_IDX(prod));
429214077Sgibbs}
430214077Sgibbs
431214077Sgibbs/**
432214077Sgibbs * Return a pointer to, and the length of, the contiguous
433214077Sgibbs * data available to read from a ring buffer.
434214077Sgibbs *
435214077Sgibbs * \param cons  The consumer index for the ring.
436214077Sgibbs * \param prod  The producer index for the ring.
437214077Sgibbs * \param buf   The base address of the ring's storage.
438214077Sgibbs * \param len   The amount of contiguous data available to read.
439214077Sgibbs *
440214077Sgibbs * \return  A pointer to the start location of the available data.
441214077Sgibbs */
442214077Sgibbsstatic const void *
443214077Sgibbsxs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
444214077Sgibbs    const char *buf, uint32_t *len)
445214077Sgibbs{
446214077Sgibbs
447214077Sgibbs	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
448214077Sgibbs	if ((prod - cons) < *len)
449214077Sgibbs		*len = prod - cons;
450214077Sgibbs	return (buf + MASK_XENSTORE_IDX(cons));
451214077Sgibbs}
452214077Sgibbs
453214077Sgibbs/**
454214077Sgibbs * Transmit data to the XenStore service.
455214077Sgibbs *
456214077Sgibbs * \param tdata  A pointer to the contiguous data to send.
457214077Sgibbs * \param len    The amount of data to send.
458214077Sgibbs *
459214077Sgibbs * \return  On success 0, otherwise an errno value indicating the
460214077Sgibbs *          cause of failure.
461214077Sgibbs *
462214077Sgibbs * \invariant  Called from thread context.
463214077Sgibbs * \invariant  The buffer pointed to by tdata is at least len bytes
464214077Sgibbs *             in length.
465214077Sgibbs * \invariant  xs.request_mutex exclusively locked.
466214077Sgibbs */
467214077Sgibbsstatic int
468214077Sgibbsxs_write_store(const void *tdata, unsigned len)
469214077Sgibbs{
470214077Sgibbs	XENSTORE_RING_IDX cons, prod;
471214077Sgibbs	const char *data = (const char *)tdata;
472214077Sgibbs	int error;
473214077Sgibbs
474214077Sgibbs	sx_assert(&xs.request_mutex, SX_XLOCKED);
475214077Sgibbs	while (len != 0) {
476214077Sgibbs		void *dst;
477214077Sgibbs		u_int avail;
478214077Sgibbs
479214077Sgibbs		/* Hold lock so we can't miss wakeups should we block. */
480214077Sgibbs		mtx_lock(&xs.ring_lock);
481214077Sgibbs		cons = xen_store->req_cons;
482214077Sgibbs		prod = xen_store->req_prod;
483214077Sgibbs		if ((prod - cons) == XENSTORE_RING_SIZE) {
484214077Sgibbs			/*
485214077Sgibbs			 * Output ring is full. Wait for a ring event.
486214077Sgibbs			 *
487214077Sgibbs			 * Note that the events from both queues
488214077Sgibbs			 * are combined, so being woken does not
489214077Sgibbs			 * guarantee that data exist in the read
490214077Sgibbs			 * ring.
491214077Sgibbs			 *
492214077Sgibbs			 * To simplify error recovery and the retry,
493214077Sgibbs			 * we specify PDROP so our lock is *not* held
494214077Sgibbs			 * when msleep returns.
495214077Sgibbs			 */
496214077Sgibbs			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
497214077Sgibbs			     "xbwrite", /*timeout*/0);
498214077Sgibbs			if (error && error != EWOULDBLOCK)
499214077Sgibbs				return (error);
500214077Sgibbs
501214077Sgibbs			/* Try again. */
502214077Sgibbs			continue;
503214077Sgibbs		}
504214077Sgibbs		mtx_unlock(&xs.ring_lock);
505214077Sgibbs
506214077Sgibbs		/* Verify queue sanity. */
507214077Sgibbs		if (!xs_check_indexes(cons, prod)) {
508214077Sgibbs			xen_store->req_cons = xen_store->req_prod = 0;
509214077Sgibbs			return (EIO);
510214077Sgibbs		}
511214077Sgibbs
512214077Sgibbs		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
513214077Sgibbs		if (avail > len)
514214077Sgibbs			avail = len;
515214077Sgibbs
516214077Sgibbs		memcpy(dst, data, avail);
517214077Sgibbs		data += avail;
518214077Sgibbs		len -= avail;
519214077Sgibbs
520214077Sgibbs		/*
521214077Sgibbs		 * The store to the producer index, which indicates
522214077Sgibbs		 * to the other side that new data has arrived, must
523214077Sgibbs		 * be visible only after our copy of the data into the
524214077Sgibbs		 * ring has completed.
525214077Sgibbs		 */
526214077Sgibbs		wmb();
527214077Sgibbs		xen_store->req_prod += avail;
528214077Sgibbs
529214077Sgibbs		/*
530255040Sgibbs		 * xen_intr_signal() implies mb(). The other side will see
531255040Sgibbs		 * the change to req_prod at the time of the interrupt.
532214077Sgibbs		 */
533255040Sgibbs		xen_intr_signal(xs.xen_intr_handle);
534214077Sgibbs	}
535214077Sgibbs
536214077Sgibbs	return (0);
537214077Sgibbs}
538214077Sgibbs
539214077Sgibbs/**
540214077Sgibbs * Receive data from the XenStore service.
541214077Sgibbs *
542214077Sgibbs * \param tdata  A pointer to the contiguous buffer to receive the data.
543214077Sgibbs * \param len    The amount of data to receive.
544214077Sgibbs *
545214077Sgibbs * \return  On success 0, otherwise an errno value indicating the
546214077Sgibbs *          cause of failure.
547214077Sgibbs *
548214077Sgibbs * \invariant  Called from thread context.
549214077Sgibbs * \invariant  The buffer pointed to by tdata is at least len bytes
550214077Sgibbs *             in length.
551214077Sgibbs *
552214077Sgibbs * \note xs_read does not perform any internal locking to guarantee
553214077Sgibbs *       serial access to the incoming ring buffer.  However, there
554214077Sgibbs *	 is only one context processing reads: xs_rcv_thread().
555214077Sgibbs */
556214077Sgibbsstatic int
557214077Sgibbsxs_read_store(void *tdata, unsigned len)
558214077Sgibbs{
559214077Sgibbs	XENSTORE_RING_IDX cons, prod;
560214077Sgibbs	char *data = (char *)tdata;
561214077Sgibbs	int error;
562214077Sgibbs
563214077Sgibbs	while (len != 0) {
564214077Sgibbs		u_int avail;
565214077Sgibbs		const char *src;
566214077Sgibbs
567214077Sgibbs		/* Hold lock so we can't miss wakeups should we block. */
568214077Sgibbs		mtx_lock(&xs.ring_lock);
569214077Sgibbs		cons = xen_store->rsp_cons;
570214077Sgibbs		prod = xen_store->rsp_prod;
571214077Sgibbs		if (cons == prod) {
572214077Sgibbs			/*
573214077Sgibbs			 * Nothing to read. Wait for a ring event.
574214077Sgibbs			 *
575214077Sgibbs			 * Note that the events from both queues
576214077Sgibbs			 * are combined, so being woken does not
577214077Sgibbs			 * guarantee that data exist in the read
578214077Sgibbs			 * ring.
579214077Sgibbs			 *
580214077Sgibbs			 * To simplify error recovery and the retry,
581214077Sgibbs			 * we specify PDROP so our lock is *not* held
582214077Sgibbs			 * when msleep returns.
583214077Sgibbs			 */
584214077Sgibbs			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
585228526Skevlo			    "xbread", /*timeout*/0);
586214077Sgibbs			if (error && error != EWOULDBLOCK)
587214077Sgibbs				return (error);
588214077Sgibbs			continue;
589214077Sgibbs		}
590214077Sgibbs		mtx_unlock(&xs.ring_lock);
591214077Sgibbs
592214077Sgibbs		/* Verify queue sanity. */
593214077Sgibbs		if (!xs_check_indexes(cons, prod)) {
594214077Sgibbs			xen_store->rsp_cons = xen_store->rsp_prod = 0;
595214077Sgibbs			return (EIO);
596214077Sgibbs		}
597214077Sgibbs
598214077Sgibbs		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
599214077Sgibbs		if (avail > len)
600214077Sgibbs			avail = len;
601214077Sgibbs
602214077Sgibbs		/*
603214077Sgibbs		 * Insure the data we read is related to the indexes
604214077Sgibbs		 * we read above.
605214077Sgibbs		 */
606214077Sgibbs		rmb();
607214077Sgibbs
608214077Sgibbs		memcpy(data, src, avail);
609214077Sgibbs		data += avail;
610214077Sgibbs		len -= avail;
611214077Sgibbs
612214077Sgibbs		/*
613214077Sgibbs		 * Insure that the producer of this ring does not see
614214077Sgibbs		 * the ring space as free until after we have copied it
615214077Sgibbs		 * out.
616214077Sgibbs		 */
617214077Sgibbs		mb();
618214077Sgibbs		xen_store->rsp_cons += avail;
619214077Sgibbs
620214077Sgibbs		/*
621255040Sgibbs		 * xen_intr_signal() implies mb(). The producer will see
622255040Sgibbs		 * the updated consumer index when the event is delivered.
623214077Sgibbs		 */
624255040Sgibbs		xen_intr_signal(xs.xen_intr_handle);
625214077Sgibbs	}
626214077Sgibbs
627214077Sgibbs	return (0);
628214077Sgibbs}
629214077Sgibbs
630214077Sgibbs/*----------------------- Received Message Processing ------------------------*/
631214077Sgibbs/**
632214077Sgibbs * Block reading the next message from the XenStore service and
633214077Sgibbs * process the result.
634214077Sgibbs *
635214077Sgibbs * \param type  The returned type of the XenStore message received.
636214077Sgibbs *
637214077Sgibbs * \return  0 on success.  Otherwise an errno value indicating the
638214077Sgibbs *          type of failure encountered.
639214077Sgibbs */
640214077Sgibbsstatic int
641214077Sgibbsxs_process_msg(enum xsd_sockmsg_type *type)
642214077Sgibbs{
643214077Sgibbs	struct xs_stored_msg *msg;
644214077Sgibbs	char *body;
645214077Sgibbs	int error;
646214077Sgibbs
647214077Sgibbs	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
648214077Sgibbs	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
649214077Sgibbs	if (error) {
650214077Sgibbs		free(msg, M_XENSTORE);
651214077Sgibbs		return (error);
652214077Sgibbs	}
653214077Sgibbs
654214077Sgibbs	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
655214077Sgibbs	error = xs_read_store(body, msg->hdr.len);
656214077Sgibbs	if (error) {
657214077Sgibbs		free(body, M_XENSTORE);
658214077Sgibbs		free(msg, M_XENSTORE);
659214077Sgibbs		return (error);
660214077Sgibbs	}
661214077Sgibbs	body[msg->hdr.len] = '\0';
662214077Sgibbs
663214077Sgibbs	*type = msg->hdr.type;
664214077Sgibbs	if (msg->hdr.type == XS_WATCH_EVENT) {
665214077Sgibbs		msg->u.watch.vec = split(body, msg->hdr.len,
666214077Sgibbs		    &msg->u.watch.vec_size);
667214077Sgibbs
668214077Sgibbs		mtx_lock(&xs.registered_watches_lock);
669214077Sgibbs		msg->u.watch.handle = find_watch(
670214077Sgibbs		    msg->u.watch.vec[XS_WATCH_TOKEN]);
671214077Sgibbs		if (msg->u.watch.handle != NULL) {
672214077Sgibbs			mtx_lock(&xs.watch_events_lock);
673214077Sgibbs			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
674214077Sgibbs			wakeup(&xs.watch_events);
675214077Sgibbs			mtx_unlock(&xs.watch_events_lock);
676214077Sgibbs		} else {
677214077Sgibbs			free(msg->u.watch.vec, M_XENSTORE);
678214077Sgibbs			free(msg, M_XENSTORE);
679214077Sgibbs		}
680214077Sgibbs		mtx_unlock(&xs.registered_watches_lock);
681214077Sgibbs	} else {
682214077Sgibbs		msg->u.reply.body = body;
683214077Sgibbs		mtx_lock(&xs.reply_lock);
684214077Sgibbs		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
685214077Sgibbs		wakeup(&xs.reply_list);
686214077Sgibbs		mtx_unlock(&xs.reply_lock);
687214077Sgibbs	}
688214077Sgibbs
689214077Sgibbs	return (0);
690214077Sgibbs}
691214077Sgibbs
692214077Sgibbs/**
693214077Sgibbs * Thread body of the XenStore receive thread.
694214077Sgibbs *
695214077Sgibbs * This thread blocks waiting for data from the XenStore service
696214077Sgibbs * and processes and received messages.
697214077Sgibbs */
698214077Sgibbsstatic void
699214077Sgibbsxs_rcv_thread(void *arg __unused)
700214077Sgibbs{
701214077Sgibbs	int error;
702214077Sgibbs	enum xsd_sockmsg_type type;
703214077Sgibbs
704214077Sgibbs	for (;;) {
705214077Sgibbs		error = xs_process_msg(&type);
706214077Sgibbs		if (error)
707214077Sgibbs			printf("XENSTORE error %d while reading message\n",
708214077Sgibbs			    error);
709214077Sgibbs	}
710214077Sgibbs}
711214077Sgibbs
712214077Sgibbs/*---------------- XenStore Message Request/Reply Processing -----------------*/
713214077Sgibbs/**
714214077Sgibbs * Filter invoked before transmitting any message to the XenStore service.
715214077Sgibbs *
716214077Sgibbs * The role of the filter may expand, but currently serves to manage
717214077Sgibbs * the interactions of messages with transaction state.
718214077Sgibbs *
719214077Sgibbs * \param request_msg_type  The message type for the request.
720214077Sgibbs */
721214077Sgibbsstatic inline void
722214077Sgibbsxs_request_filter(uint32_t request_msg_type)
723214077Sgibbs{
724214077Sgibbs	if (request_msg_type == XS_TRANSACTION_START)
725214077Sgibbs		sx_slock(&xs.suspend_mutex);
726214077Sgibbs}
727214077Sgibbs
728214077Sgibbs/**
729214077Sgibbs * Filter invoked after transmitting any message to the XenStore service.
730214077Sgibbs *
731214077Sgibbs * The role of the filter may expand, but currently serves to manage
732214077Sgibbs * the interactions of messages with transaction state.
733214077Sgibbs *
734214077Sgibbs * \param request_msg_type     The message type for the original request.
735214077Sgibbs * \param reply_msg_type       The message type for any received reply.
736214077Sgibbs * \param request_reply_error  The error status from the attempt to send
737214077Sgibbs *                             the request or retrieve the reply.
738214077Sgibbs */
739214077Sgibbsstatic inline void
740214077Sgibbsxs_reply_filter(uint32_t request_msg_type,
741214077Sgibbs    uint32_t reply_msg_type, int request_reply_error)
742214077Sgibbs{
743214077Sgibbs	/*
744214077Sgibbs	 * The count of transactions drops if we attempted
745214077Sgibbs	 * to end a transaction (even if that attempt fails
746225704Sgibbs	 * in error), we receive a transaction end acknowledgement,
747225704Sgibbs	 * or if our attempt to begin a transaction fails.
748214077Sgibbs	 */
749214077Sgibbs	if (request_msg_type == XS_TRANSACTION_END
750214077Sgibbs	 || (request_reply_error == 0 && reply_msg_type == XS_TRANSACTION_END)
751214077Sgibbs	 || (request_msg_type == XS_TRANSACTION_START
752214077Sgibbs	  && (request_reply_error != 0 || reply_msg_type == XS_ERROR)))
753214077Sgibbs		sx_sunlock(&xs.suspend_mutex);
754214077Sgibbs
755214077Sgibbs}
756214077Sgibbs
757186557Skmacy#define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
758186557Skmacy
759214077Sgibbs/**
760214077Sgibbs * Convert a XenStore error string into an errno number.
761214077Sgibbs *
762214077Sgibbs * \param errorstring  The error string to convert.
763214077Sgibbs *
764214077Sgibbs * \return  The errno best matching the input string.
765214077Sgibbs *
766214077Sgibbs * \note Unknown error strings are converted to EINVAL.
767214077Sgibbs */
768186557Skmacystatic int
769186557Skmacyxs_get_error(const char *errorstring)
770181624Skmacy{
771214077Sgibbs	u_int i;
772181624Skmacy
773186557Skmacy	for (i = 0; i < xsd_error_count; i++) {
774186557Skmacy		if (!strcmp(errorstring, xsd_errors[i].errstring))
775186557Skmacy			return (xsd_errors[i].errnum);
776186557Skmacy	}
777214077Sgibbs	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
778186557Skmacy	    errorstring);
779186557Skmacy	return (EINVAL);
780181624Skmacy}
781181624Skmacy
782214077Sgibbs/**
783214077Sgibbs * Block waiting for a reply to a message request.
784214077Sgibbs *
785214077Sgibbs * \param type	  The returned type of the reply.
786214077Sgibbs * \param len	  The returned body length of the reply.
787214077Sgibbs * \param result  The returned body of the reply.
788214077Sgibbs *
789214077Sgibbs * \return  0 on success.  Otherwise an errno indicating the
790214077Sgibbs *          cause of failure.
791214077Sgibbs */
792186557Skmacystatic int
793214077Sgibbsxs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
794181624Skmacy{
795186557Skmacy	struct xs_stored_msg *msg;
796186557Skmacy	char *body;
797186557Skmacy	int error;
798181893Skmacy
799214077Sgibbs	mtx_lock(&xs.reply_lock);
800214077Sgibbs	while (TAILQ_EMPTY(&xs.reply_list)) {
801214077Sgibbs		error = mtx_sleep(&xs.reply_list, &xs.reply_lock,
802214077Sgibbs		    PCATCH, "xswait", hz/10);
803214077Sgibbs		if (error && error != EWOULDBLOCK) {
804214077Sgibbs			mtx_unlock(&xs.reply_lock);
805214077Sgibbs			return (error);
806181624Skmacy		}
807189699Sdfr	}
808214077Sgibbs	msg = TAILQ_FIRST(&xs.reply_list);
809214077Sgibbs	TAILQ_REMOVE(&xs.reply_list, msg, list);
810214077Sgibbs	mtx_unlock(&xs.reply_lock);
811181624Skmacy
812186557Skmacy	*type = msg->hdr.type;
813186557Skmacy	if (len)
814186557Skmacy		*len = msg->hdr.len;
815186557Skmacy	body = msg->u.reply.body;
816181624Skmacy
817214077Sgibbs	free(msg, M_XENSTORE);
818186557Skmacy	*result = body;
819186557Skmacy	return (0);
820181624Skmacy}
821181624Skmacy
822214077Sgibbs/**
823214077Sgibbs * Pass-thru interface for XenStore access by userland processes
824214077Sgibbs * via the XenStore device.
825214077Sgibbs *
826214077Sgibbs * Reply type and length data are returned by overwriting these
827214077Sgibbs * fields in the passed in request message.
828214077Sgibbs *
829214077Sgibbs * \param msg	  A properly formatted message to transmit to
830214077Sgibbs *		  the XenStore service.
831214077Sgibbs * \param result  The returned body of the reply.
832214077Sgibbs *
833214077Sgibbs * \return  0 on success.  Otherwise an errno indicating the cause
834214077Sgibbs *          of failure.
835214077Sgibbs *
836214077Sgibbs * \note The returned result is provided in malloced storage and thus
837214077Sgibbs *       must be free'd by the caller with 'free(result, M_XENSTORE);
838214077Sgibbs */
839186557Skmacyint
840214077Sgibbsxs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
841181624Skmacy{
842214077Sgibbs	uint32_t request_type;
843186557Skmacy	int error;
844181624Skmacy
845214077Sgibbs	request_type = msg->type;
846214077Sgibbs	xs_request_filter(request_type);
847181624Skmacy
848214077Sgibbs	sx_xlock(&xs.request_mutex);
849214077Sgibbs	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
850186557Skmacy		error = xs_read_reply(&msg->type, &msg->len, result);
851214077Sgibbs	sx_xunlock(&xs.request_mutex);
852181624Skmacy
853214077Sgibbs	xs_reply_filter(request_type, msg->type, error);
854181624Skmacy
855186557Skmacy	return (error);
856181624Skmacy}
857181624Skmacy
858214077Sgibbs/**
859214077Sgibbs * Send a message with an optionally muti-part body to the XenStore service.
860214077Sgibbs *
861214077Sgibbs * \param t              The transaction to use for this request.
862214077Sgibbs * \param request_type   The type of message to send.
863214077Sgibbs * \param iovec          Pointers to the body sections of the request.
864214077Sgibbs * \param num_vecs       The number of body sections in the request.
865214077Sgibbs * \param len            The returned length of the reply.
866214077Sgibbs * \param result         The returned body of the reply.
867214077Sgibbs *
868214077Sgibbs * \return  0 on success.  Otherwise an errno indicating
869214077Sgibbs *          the cause of failure.
870214077Sgibbs *
871214077Sgibbs * \note The returned result is provided in malloced storage and thus
872214077Sgibbs *       must be free'd by the caller with 'free(*result, M_XENSTORE);
873186557Skmacy */
874186557Skmacystatic int
875214077Sgibbsxs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
876214077Sgibbs    const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
877181624Skmacy{
878186557Skmacy	struct xsd_sockmsg msg;
879186557Skmacy	void *ret = NULL;
880214077Sgibbs	u_int i;
881186557Skmacy	int error;
882181624Skmacy
883186557Skmacy	msg.tx_id = t.id;
884186557Skmacy	msg.req_id = 0;
885214077Sgibbs	msg.type = request_type;
886186557Skmacy	msg.len = 0;
887186557Skmacy	for (i = 0; i < num_vecs; i++)
888186557Skmacy		msg.len += iovec[i].iov_len;
889181624Skmacy
890214077Sgibbs	xs_request_filter(request_type);
891181624Skmacy
892214077Sgibbs	sx_xlock(&xs.request_mutex);
893214077Sgibbs	error = xs_write_store(&msg, sizeof(msg));
894186557Skmacy	if (error) {
895186557Skmacy		printf("xs_talkv failed %d\n", error);
896214077Sgibbs		goto error_lock_held;
897186557Skmacy	}
898181624Skmacy
899186557Skmacy	for (i = 0; i < num_vecs; i++) {
900214077Sgibbs		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
901214077Sgibbs		if (error) {
902186557Skmacy			printf("xs_talkv failed %d\n", error);
903214077Sgibbs			goto error_lock_held;
904181624Skmacy		}
905186557Skmacy	}
906181624Skmacy
907186557Skmacy	error = xs_read_reply(&msg.type, len, &ret);
908181624Skmacy
909214077Sgibbserror_lock_held:
910214077Sgibbs	sx_xunlock(&xs.request_mutex);
911214077Sgibbs	xs_reply_filter(request_type, msg.type, error);
912186557Skmacy	if (error)
913186557Skmacy		return (error);
914181624Skmacy
915186557Skmacy	if (msg.type == XS_ERROR) {
916186557Skmacy		error = xs_get_error(ret);
917214077Sgibbs		free(ret, M_XENSTORE);
918186557Skmacy		return (error);
919186557Skmacy	}
920181889Skmacy
921214077Sgibbs	/* Reply is either error or an echo of our request message type. */
922214077Sgibbs	KASSERT(msg.type == request_type, ("bad xenstore message type"));
923181889Skmacy
924186557Skmacy	if (result)
925186557Skmacy		*result = ret;
926186557Skmacy	else
927214077Sgibbs		free(ret, M_XENSTORE);
928186557Skmacy
929186557Skmacy	return (0);
930181624Skmacy}
931181624Skmacy
932214077Sgibbs/**
933214077Sgibbs * Wrapper for xs_talkv allowing easy transmission of a message with
934214077Sgibbs * a single, contiguous, message body.
935214077Sgibbs *
936214077Sgibbs * \param t              The transaction to use for this request.
937214077Sgibbs * \param request_type   The type of message to send.
938214077Sgibbs * \param body           The body of the request.
939214077Sgibbs * \param len            The returned length of the reply.
940214077Sgibbs * \param result         The returned body of the reply.
941214077Sgibbs *
942214077Sgibbs * \return  0 on success.  Otherwise an errno indicating
943214077Sgibbs *          the cause of failure.
944214077Sgibbs *
945214077Sgibbs * \note The returned result is provided in malloced storage and thus
946214077Sgibbs *       must be free'd by the caller with 'free(*result, M_XENSTORE);
947214077Sgibbs */
948186557Skmacystatic int
949214077Sgibbsxs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
950214077Sgibbs    const char *body, u_int *len, void **result)
951181624Skmacy{
952186557Skmacy	struct iovec iovec;
953181624Skmacy
954214077Sgibbs	iovec.iov_base = (void *)(uintptr_t)body;
955214077Sgibbs	iovec.iov_len = strlen(body) + 1;
956181624Skmacy
957214077Sgibbs	return (xs_talkv(t, request_type, &iovec, 1, len, result));
958181624Skmacy}
959181624Skmacy
960214077Sgibbs/*------------------------- XenStore Watch Support ---------------------------*/
961214077Sgibbs/**
962214077Sgibbs * Transmit a watch request to the XenStore service.
963214077Sgibbs *
964214077Sgibbs * \param path    The path in the XenStore to watch.
965214077Sgibbs * \param tocken  A unique identifier for this watch.
966214077Sgibbs *
967214077Sgibbs * \return  0 on success.  Otherwise an errno indicating the
968214077Sgibbs *          cause of failure.
969214077Sgibbs */
970214077Sgibbsstatic int
971214077Sgibbsxs_watch(const char *path, const char *token)
972181624Skmacy{
973214077Sgibbs	struct iovec iov[2];
974181624Skmacy
975214077Sgibbs	iov[0].iov_base = (void *)(uintptr_t) path;
976214077Sgibbs	iov[0].iov_len = strlen(path) + 1;
977214077Sgibbs	iov[1].iov_base = (void *)(uintptr_t) token;
978214077Sgibbs	iov[1].iov_len = strlen(token) + 1;
979181624Skmacy
980214077Sgibbs	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
981181624Skmacy}
982181624Skmacy
983214077Sgibbs/**
984214077Sgibbs * Transmit an uwatch request to the XenStore service.
985214077Sgibbs *
986214077Sgibbs * \param path    The path in the XenStore to watch.
987214077Sgibbs * \param tocken  A unique identifier for this watch.
988214077Sgibbs *
989214077Sgibbs * \return  0 on success.  Otherwise an errno indicating the
990214077Sgibbs *          cause of failure.
991214077Sgibbs */
992214077Sgibbsstatic int
993214077Sgibbsxs_unwatch(const char *path, const char *token)
994181624Skmacy{
995214077Sgibbs	struct iovec iov[2];
996181624Skmacy
997214077Sgibbs	iov[0].iov_base = (void *)(uintptr_t) path;
998214077Sgibbs	iov[0].iov_len = strlen(path) + 1;
999214077Sgibbs	iov[1].iov_base = (void *)(uintptr_t) token;
1000214077Sgibbs	iov[1].iov_len = strlen(token) + 1;
1001181624Skmacy
1002214077Sgibbs	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
1003214077Sgibbs}
1004214077Sgibbs
1005214077Sgibbs/**
1006214077Sgibbs * Convert from watch token (unique identifier) to the associated
1007214077Sgibbs * internal tracking structure for this watch.
1008214077Sgibbs *
1009214077Sgibbs * \param tocken  The unique identifier for the watch to find.
1010214077Sgibbs *
1011214077Sgibbs * \return  A pointer to the found watch structure or NULL.
1012214077Sgibbs */
1013214077Sgibbsstatic struct xs_watch *
1014214077Sgibbsfind_watch(const char *token)
1015214077Sgibbs{
1016214077Sgibbs	struct xs_watch *i, *cmp;
1017214077Sgibbs
1018214077Sgibbs	cmp = (void *)strtoul(token, NULL, 16);
1019214077Sgibbs
1020214077Sgibbs	LIST_FOREACH(i, &xs.registered_watches, list)
1021214077Sgibbs		if (i == cmp)
1022214077Sgibbs			return (i);
1023214077Sgibbs
1024214077Sgibbs	return (NULL);
1025214077Sgibbs}
1026214077Sgibbs
1027214077Sgibbs/**
1028214077Sgibbs * Thread body of the XenStore watch event dispatch thread.
1029214077Sgibbs */
1030214077Sgibbsstatic void
1031214077Sgibbsxenwatch_thread(void *unused)
1032214077Sgibbs{
1033214077Sgibbs	struct xs_stored_msg *msg;
1034214077Sgibbs
1035214077Sgibbs	for (;;) {
1036214077Sgibbs
1037214077Sgibbs		mtx_lock(&xs.watch_events_lock);
1038214077Sgibbs		while (TAILQ_EMPTY(&xs.watch_events))
1039214077Sgibbs			mtx_sleep(&xs.watch_events,
1040214077Sgibbs			    &xs.watch_events_lock,
1041214077Sgibbs			    PWAIT | PCATCH, "waitev", hz/10);
1042214077Sgibbs
1043214077Sgibbs		mtx_unlock(&xs.watch_events_lock);
1044214077Sgibbs		sx_xlock(&xs.xenwatch_mutex);
1045214077Sgibbs
1046214077Sgibbs		mtx_lock(&xs.watch_events_lock);
1047214077Sgibbs		msg = TAILQ_FIRST(&xs.watch_events);
1048214077Sgibbs		if (msg)
1049214077Sgibbs			TAILQ_REMOVE(&xs.watch_events, msg, list);
1050214077Sgibbs		mtx_unlock(&xs.watch_events_lock);
1051214077Sgibbs
1052214077Sgibbs		if (msg != NULL) {
1053214077Sgibbs			/*
1054214077Sgibbs			 * XXX There are messages coming in with a NULL
1055214077Sgibbs			 * XXX callback.  This deserves further investigation;
1056214077Sgibbs			 * XXX the workaround here simply prevents the kernel
1057214077Sgibbs			 * XXX from panic'ing on startup.
1058214077Sgibbs			 */
1059214077Sgibbs			if (msg->u.watch.handle->callback != NULL)
1060214077Sgibbs				msg->u.watch.handle->callback(
1061214077Sgibbs					msg->u.watch.handle,
1062214077Sgibbs					(const char **)msg->u.watch.vec,
1063214077Sgibbs					msg->u.watch.vec_size);
1064214077Sgibbs			free(msg->u.watch.vec, M_XENSTORE);
1065214077Sgibbs			free(msg, M_XENSTORE);
1066214077Sgibbs		}
1067214077Sgibbs
1068214077Sgibbs		sx_xunlock(&xs.xenwatch_mutex);
1069186557Skmacy	}
1070214077Sgibbs}
1071181624Skmacy
1072214077Sgibbs/*----------- XenStore Configuration, Initialization, and Control ------------*/
1073214077Sgibbs/**
1074214077Sgibbs * Setup communication channels with the XenStore service.
1075214077Sgibbs *
1076214077Sgibbs * \return  On success, 0. Otherwise an errno value indicating the
1077214077Sgibbs *          type of failure.
1078214077Sgibbs */
1079214077Sgibbsstatic int
1080214077Sgibbsxs_init_comms(void)
1081214077Sgibbs{
1082214077Sgibbs	int error;
1083214077Sgibbs
1084214077Sgibbs	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1085214077Sgibbs		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1086214077Sgibbs		    "(%08x:%08x): fixing up\n",
1087214077Sgibbs		    xen_store->rsp_cons, xen_store->rsp_prod);
1088214077Sgibbs		xen_store->rsp_cons = xen_store->rsp_prod;
1089214077Sgibbs	}
1090214077Sgibbs
1091255040Sgibbs	xen_intr_unbind(&xs.xen_intr_handle);
1092214077Sgibbs
1093255040Sgibbs	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
1094255040Sgibbs	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
1095255040Sgibbs	    &xs.xen_intr_handle);
1096214077Sgibbs	if (error) {
1097214077Sgibbs		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1098214077Sgibbs		return (error);
1099214077Sgibbs	}
1100214077Sgibbs
1101214077Sgibbs	return (0);
1102181624Skmacy}
1103181624Skmacy
1104214077Sgibbs/*------------------ Private Device Attachment Functions  --------------------*/
1105214077Sgibbsstatic void
1106214077Sgibbsxs_identify(driver_t *driver, device_t parent)
1107181624Skmacy{
1108181624Skmacy
1109214077Sgibbs	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1110214077Sgibbs}
1111181624Skmacy
1112214077Sgibbs/**
1113298955Spfg * Probe for the existence of the XenStore.
1114214077Sgibbs *
1115214077Sgibbs * \param dev
1116214077Sgibbs */
1117214077Sgibbsstatic int
1118214077Sgibbsxs_probe(device_t dev)
1119214077Sgibbs{
1120214077Sgibbs	/*
1121214077Sgibbs	 * We are either operating within a PV kernel or being probed
1122214077Sgibbs	 * as the child of the successfully attached xenpci device.
1123214077Sgibbs	 * Thus we are in a Xen environment and there will be a XenStore.
1124216448Sgibbs	 * Unconditionally return success.
1125214077Sgibbs	 */
1126214077Sgibbs	device_set_desc(dev, "XenStore");
1127289686Sroyger	return (BUS_PROBE_NOWILDCARD);
1128214077Sgibbs}
1129181624Skmacy
1130214077Sgibbsstatic void
1131214077Sgibbsxs_attach_deferred(void *arg)
1132214077Sgibbs{
1133181624Skmacy
1134214077Sgibbs	bus_generic_probe(xs.xs_dev);
1135214077Sgibbs	bus_generic_attach(xs.xs_dev);
1136214077Sgibbs
1137214077Sgibbs	config_intrhook_disestablish(&xs.xs_attachcb);
1138181624Skmacy}
1139181624Skmacy
1140272317Sroygerstatic void
1141272317Sroygerxs_attach_late(void *arg, int pending)
1142272317Sroyger{
1143272317Sroyger
1144272317Sroyger	KASSERT((pending == 1), ("xs late attach queued several times"));
1145272317Sroyger	bus_generic_probe(xs.xs_dev);
1146272317Sroyger	bus_generic_attach(xs.xs_dev);
1147272317Sroyger}
1148272317Sroyger
1149214077Sgibbs/**
1150214077Sgibbs * Attach to the XenStore.
1151214077Sgibbs *
1152214077Sgibbs * This routine also prepares for the probe/attach of drivers that rely
1153214077Sgibbs * on the XenStore.
1154186557Skmacy */
1155214077Sgibbsstatic int
1156214077Sgibbsxs_attach(device_t dev)
1157214077Sgibbs{
1158214077Sgibbs	int error;
1159214077Sgibbs
1160214077Sgibbs	/* Allow us to get device_t from softc and vice-versa. */
1161214077Sgibbs	xs.xs_dev = dev;
1162214077Sgibbs	device_set_softc(dev, &xs);
1163214077Sgibbs
1164214077Sgibbs	/* Initialize the interface to xenstore. */
1165214077Sgibbs	struct proc *p;
1166214077Sgibbs
1167272317Sroyger	xs.initialized = false;
1168267532Sroyger	if (xen_hvm_domain()) {
1169267532Sroyger		xs.evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
1170267532Sroyger		xs.gpfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
1171267532Sroyger		xen_store = pmap_mapdev(xs.gpfn * PAGE_SIZE, PAGE_SIZE);
1172272317Sroyger		xs.initialized = true;
1173267532Sroyger	} else if (xen_pv_domain()) {
1174272317Sroyger		if (HYPERVISOR_start_info->store_evtchn == 0) {
1175272317Sroyger			struct evtchn_alloc_unbound alloc_unbound;
1176272317Sroyger
1177272317Sroyger			/* Allocate a local event channel for xenstore */
1178272317Sroyger			alloc_unbound.dom = DOMID_SELF;
1179272317Sroyger			alloc_unbound.remote_dom = DOMID_SELF;
1180272317Sroyger			error = HYPERVISOR_event_channel_op(
1181272317Sroyger			    EVTCHNOP_alloc_unbound, &alloc_unbound);
1182272317Sroyger			if (error != 0)
1183272317Sroyger				panic(
1184272317Sroyger				   "unable to alloc event channel for Dom0: %d",
1185272317Sroyger				    error);
1186272317Sroyger
1187272317Sroyger			HYPERVISOR_start_info->store_evtchn =
1188272317Sroyger			    alloc_unbound.port;
1189272317Sroyger			xs.evtchn = alloc_unbound.port;
1190272317Sroyger
1191272317Sroyger			/* Allocate memory for the xs shared ring */
1192272317Sroyger			xen_store = malloc(PAGE_SIZE, M_XENSTORE,
1193272317Sroyger			    M_WAITOK | M_ZERO);
1194272317Sroyger		} else {
1195272317Sroyger			xs.evtchn = HYPERVISOR_start_info->store_evtchn;
1196272317Sroyger			xs.initialized = true;
1197272317Sroyger		}
1198267532Sroyger	} else {
1199267532Sroyger		panic("Unknown domain type, cannot initialize xenstore.");
1200267532Sroyger	}
1201214077Sgibbs
1202214077Sgibbs	TAILQ_INIT(&xs.reply_list);
1203214077Sgibbs	TAILQ_INIT(&xs.watch_events);
1204214077Sgibbs
1205214077Sgibbs	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1206214077Sgibbs	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1207214077Sgibbs	sx_init(&xs.xenwatch_mutex, "xenwatch");
1208214077Sgibbs	sx_init(&xs.request_mutex, "xenstore request");
1209214077Sgibbs	sx_init(&xs.suspend_mutex, "xenstore suspend");
1210214077Sgibbs	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1211214077Sgibbs	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1212214077Sgibbs
1213214077Sgibbs	/* Initialize the shared memory rings to talk to xenstored */
1214214077Sgibbs	error = xs_init_comms();
1215214077Sgibbs	if (error)
1216214077Sgibbs		return (error);
1217214077Sgibbs
1218214077Sgibbs	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1219214077Sgibbs	    0, "xenwatch");
1220214077Sgibbs	if (error)
1221214077Sgibbs		return (error);
1222214077Sgibbs	xs.xenwatch_pid = p->p_pid;
1223214077Sgibbs
1224214077Sgibbs	error = kproc_create(xs_rcv_thread, NULL, NULL,
1225214077Sgibbs	    RFHIGHPID, 0, "xenstore_rcv");
1226214077Sgibbs
1227214077Sgibbs	xs.xs_attachcb.ich_func = xs_attach_deferred;
1228214077Sgibbs	xs.xs_attachcb.ich_arg = NULL;
1229272317Sroyger	if (xs.initialized) {
1230272317Sroyger		config_intrhook_establish(&xs.xs_attachcb);
1231272317Sroyger	} else {
1232272317Sroyger		TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL);
1233272317Sroyger	}
1234214077Sgibbs
1235214077Sgibbs	return (error);
1236214077Sgibbs}
1237214077Sgibbs
1238214077Sgibbs/**
1239214077Sgibbs * Prepare for suspension of this VM by halting XenStore access after
1240214077Sgibbs * all transactions and individual requests have completed.
1241214077Sgibbs */
1242214077Sgibbsstatic int
1243225704Sgibbsxs_suspend(device_t dev)
1244214077Sgibbs{
1245225704Sgibbs	int error;
1246214077Sgibbs
1247225704Sgibbs	/* Suspend child Xen devices. */
1248225704Sgibbs	error = bus_generic_suspend(dev);
1249225704Sgibbs	if (error != 0)
1250225704Sgibbs		return (error);
1251225704Sgibbs
1252214077Sgibbs	sx_xlock(&xs.suspend_mutex);
1253214077Sgibbs	sx_xlock(&xs.request_mutex);
1254214077Sgibbs
1255214077Sgibbs	return (0);
1256214077Sgibbs}
1257214077Sgibbs
1258214077Sgibbs/**
1259214077Sgibbs * Resume XenStore operations after this VM is resumed.
1260214077Sgibbs */
1261214077Sgibbsstatic int
1262214077Sgibbsxs_resume(device_t dev __unused)
1263214077Sgibbs{
1264214077Sgibbs	struct xs_watch *watch;
1265214077Sgibbs	char token[sizeof(watch) * 2 + 1];
1266214077Sgibbs
1267214077Sgibbs	xs_init_comms();
1268214077Sgibbs
1269214077Sgibbs	sx_xunlock(&xs.request_mutex);
1270214077Sgibbs
1271214077Sgibbs	/*
1272214077Sgibbs	 * No need for registered_watches_lock: the suspend_mutex
1273214077Sgibbs	 * is sufficient.
1274214077Sgibbs	 */
1275214077Sgibbs	LIST_FOREACH(watch, &xs.registered_watches, list) {
1276214077Sgibbs		sprintf(token, "%lX", (long)watch);
1277214077Sgibbs		xs_watch(watch->node, token);
1278214077Sgibbs	}
1279214077Sgibbs
1280214077Sgibbs	sx_xunlock(&xs.suspend_mutex);
1281214077Sgibbs
1282225704Sgibbs	/* Resume child Xen devices. */
1283225704Sgibbs	bus_generic_resume(dev);
1284225704Sgibbs
1285214077Sgibbs	return (0);
1286214077Sgibbs}
1287214077Sgibbs
1288214077Sgibbs/*-------------------- Private Device Attachment Data  -----------------------*/
1289214077Sgibbsstatic device_method_t xenstore_methods[] = {
1290214077Sgibbs	/* Device interface */
1291214077Sgibbs	DEVMETHOD(device_identify,	xs_identify),
1292214077Sgibbs	DEVMETHOD(device_probe,         xs_probe),
1293214077Sgibbs	DEVMETHOD(device_attach,        xs_attach),
1294214077Sgibbs	DEVMETHOD(device_detach,        bus_generic_detach),
1295214077Sgibbs	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1296214077Sgibbs	DEVMETHOD(device_suspend,       xs_suspend),
1297214077Sgibbs	DEVMETHOD(device_resume,        xs_resume),
1298214077Sgibbs
1299214077Sgibbs	/* Bus interface */
1300214077Sgibbs	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1301214077Sgibbs	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1302214077Sgibbs	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1303214077Sgibbs	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1304214077Sgibbs	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1305227843Smarius
1306227843Smarius	DEVMETHOD_END
1307214077Sgibbs};
1308214077Sgibbs
1309214077SgibbsDEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1310214077Sgibbsstatic devclass_t xenstore_devclass;
1311214077Sgibbs
1312267528SroygerDRIVER_MODULE(xenstore, xenpv, xenstore_driver, xenstore_devclass, 0, 0);
1313214077Sgibbs
1314214077Sgibbs/*------------------------------- Sysctl Data --------------------------------*/
1315214077Sgibbs/* XXX Shouldn't the node be somewhere else? */
1316214077SgibbsSYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
1317214077SgibbsSYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1318214077SgibbsSYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1319214077Sgibbs
1320214077Sgibbs/*-------------------------------- Public API --------------------------------*/
1321214077Sgibbs/*------- API comments for these methods can be found in xenstorevar.h -------*/
1322186557Skmacyint
1323214077Sgibbsxs_directory(struct xs_transaction t, const char *dir, const char *node,
1324214077Sgibbs    u_int *num, const char ***result)
1325181624Skmacy{
1326214077Sgibbs	struct sbuf *path;
1327214077Sgibbs	char *strings;
1328214077Sgibbs	u_int len = 0;
1329186557Skmacy	int error;
1330181624Skmacy
1331214077Sgibbs	path = xs_join(dir, node);
1332214077Sgibbs	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1333214077Sgibbs	    (void **)&strings);
1334214077Sgibbs	sbuf_delete(path);
1335186557Skmacy	if (error)
1336186557Skmacy		return (error);
1337181624Skmacy
1338186557Skmacy	*result = split(strings, len, num);
1339214077Sgibbs
1340186557Skmacy	return (0);
1341181624Skmacy}
1342181624Skmacy
1343186557Skmacyint
1344214077Sgibbsxs_exists(struct xs_transaction t, const char *dir, const char *node)
1345181624Skmacy{
1346214077Sgibbs	const char **d;
1347186557Skmacy	int error, dir_n;
1348181624Skmacy
1349214077Sgibbs	error = xs_directory(t, dir, node, &dir_n, &d);
1350186557Skmacy	if (error)
1351186557Skmacy		return (0);
1352214077Sgibbs	free(d, M_XENSTORE);
1353186557Skmacy	return (1);
1354181624Skmacy}
1355181624Skmacy
1356186557Skmacyint
1357214077Sgibbsxs_read(struct xs_transaction t, const char *dir, const char *node,
1358214077Sgibbs    u_int *len, void **result)
1359181624Skmacy{
1360214077Sgibbs	struct sbuf *path;
1361186557Skmacy	void *ret;
1362186557Skmacy	int error;
1363181624Skmacy
1364214077Sgibbs	path = xs_join(dir, node);
1365214077Sgibbs	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1366214077Sgibbs	sbuf_delete(path);
1367186557Skmacy	if (error)
1368186557Skmacy		return (error);
1369186557Skmacy	*result = ret;
1370186557Skmacy	return (0);
1371181624Skmacy}
1372181624Skmacy
1373186557Skmacyint
1374214077Sgibbsxs_write(struct xs_transaction t, const char *dir, const char *node,
1375186557Skmacy    const char *string)
1376181624Skmacy{
1377214077Sgibbs	struct sbuf *path;
1378186557Skmacy	struct iovec iovec[2];
1379186557Skmacy	int error;
1380181624Skmacy
1381214077Sgibbs	path = xs_join(dir, node);
1382181624Skmacy
1383214077Sgibbs	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1384214077Sgibbs	iovec[0].iov_len = sbuf_len(path) + 1;
1385186557Skmacy	iovec[1].iov_base = (void *)(uintptr_t) string;
1386186557Skmacy	iovec[1].iov_len = strlen(string);
1387181624Skmacy
1388186557Skmacy	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1389214077Sgibbs	sbuf_delete(path);
1390186557Skmacy
1391186557Skmacy	return (error);
1392181624Skmacy}
1393181624Skmacy
1394186557Skmacyint
1395214077Sgibbsxs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1396181624Skmacy{
1397214077Sgibbs	struct sbuf *path;
1398186557Skmacy	int ret;
1399181624Skmacy
1400214077Sgibbs	path = xs_join(dir, node);
1401214077Sgibbs	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1402214077Sgibbs	sbuf_delete(path);
1403181624Skmacy
1404186557Skmacy	return (ret);
1405181624Skmacy}
1406181624Skmacy
1407186557Skmacyint
1408214077Sgibbsxs_rm(struct xs_transaction t, const char *dir, const char *node)
1409181624Skmacy{
1410214077Sgibbs	struct sbuf *path;
1411186557Skmacy	int ret;
1412181624Skmacy
1413214077Sgibbs	path = xs_join(dir, node);
1414214077Sgibbs	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1415214077Sgibbs	sbuf_delete(path);
1416181624Skmacy
1417186557Skmacy	return (ret);
1418181624Skmacy}
1419181624Skmacy
1420186557Skmacyint
1421214077Sgibbsxs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1422181624Skmacy{
1423214077Sgibbs	struct xs_transaction local_xbt;
1424214077Sgibbs	struct sbuf *root_path_sbuf;
1425214077Sgibbs	struct sbuf *cur_path_sbuf;
1426214077Sgibbs	char *root_path;
1427214077Sgibbs	char *cur_path;
1428214077Sgibbs	const char **dir;
1429186557Skmacy	int error;
1430181624Skmacy
1431214077Sgibbsretry:
1432214077Sgibbs	root_path_sbuf = xs_join(base, node);
1433214077Sgibbs	cur_path_sbuf  = xs_join(base, node);
1434214077Sgibbs	root_path      = sbuf_data(root_path_sbuf);
1435214077Sgibbs	cur_path       = sbuf_data(cur_path_sbuf);
1436214077Sgibbs	dir            = NULL;
1437214077Sgibbs	local_xbt.id   = 0;
1438214077Sgibbs
1439214077Sgibbs	if (xbt.id == 0) {
1440214077Sgibbs		error = xs_transaction_start(&local_xbt);
1441214077Sgibbs		if (error != 0)
1442214077Sgibbs			goto out;
1443214077Sgibbs		xbt = local_xbt;
1444186557Skmacy	}
1445181624Skmacy
1446214077Sgibbs	while (1) {
1447214077Sgibbs		u_int count;
1448214077Sgibbs		u_int i;
1449181624Skmacy
1450214077Sgibbs		error = xs_directory(xbt, cur_path, "", &count, &dir);
1451214077Sgibbs		if (error)
1452214077Sgibbs			goto out;
1453214077Sgibbs
1454214077Sgibbs		for (i = 0; i < count; i++) {
1455214077Sgibbs			error = xs_rm(xbt, cur_path, dir[i]);
1456214077Sgibbs			if (error == ENOTEMPTY) {
1457214077Sgibbs				struct sbuf *push_dir;
1458214077Sgibbs
1459214077Sgibbs				/*
1460214077Sgibbs				 * Descend to clear out this sub directory.
1461214077Sgibbs				 * We'll return to cur_dir once push_dir
1462214077Sgibbs				 * is empty.
1463214077Sgibbs				 */
1464214077Sgibbs				push_dir = xs_join(cur_path, dir[i]);
1465214077Sgibbs				sbuf_delete(cur_path_sbuf);
1466214077Sgibbs				cur_path_sbuf = push_dir;
1467214077Sgibbs				cur_path = sbuf_data(cur_path_sbuf);
1468214077Sgibbs				break;
1469214077Sgibbs			} else if (error != 0) {
1470214077Sgibbs				goto out;
1471214077Sgibbs			}
1472214077Sgibbs		}
1473214077Sgibbs
1474214077Sgibbs		free(dir, M_XENSTORE);
1475214077Sgibbs		dir = NULL;
1476214077Sgibbs
1477214077Sgibbs		if (i == count) {
1478214077Sgibbs			char *last_slash;
1479214077Sgibbs
1480214077Sgibbs			/* Directory is empty.  It is now safe to remove. */
1481214077Sgibbs			error = xs_rm(xbt, cur_path, "");
1482214077Sgibbs			if (error != 0)
1483214077Sgibbs				goto out;
1484214077Sgibbs
1485214077Sgibbs			if (!strcmp(cur_path, root_path))
1486214077Sgibbs				break;
1487214077Sgibbs
1488214077Sgibbs			/* Return to processing the parent directory. */
1489214077Sgibbs			last_slash = strrchr(cur_path, '/');
1490214077Sgibbs			KASSERT(last_slash != NULL,
1491214077Sgibbs				("xs_rm_tree: mangled path %s", cur_path));
1492214077Sgibbs			*last_slash = '\0';
1493214077Sgibbs		}
1494214077Sgibbs	}
1495214077Sgibbs
1496214077Sgibbsout:
1497214077Sgibbs	sbuf_delete(cur_path_sbuf);
1498214077Sgibbs	sbuf_delete(root_path_sbuf);
1499214077Sgibbs	if (dir != NULL)
1500214077Sgibbs		free(dir, M_XENSTORE);
1501214077Sgibbs
1502214077Sgibbs	if (local_xbt.id != 0) {
1503214077Sgibbs		int terror;
1504214077Sgibbs
1505214077Sgibbs		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1506214077Sgibbs		xbt.id = 0;
1507214077Sgibbs		if (terror == EAGAIN && error == 0)
1508214077Sgibbs			goto retry;
1509214077Sgibbs	}
1510214077Sgibbs	return (error);
1511181624Skmacy}
1512181624Skmacy
1513214077Sgibbsint
1514214077Sgibbsxs_transaction_start(struct xs_transaction *t)
1515181624Skmacy{
1516214077Sgibbs	char *id_str;
1517186557Skmacy	int error;
1518181624Skmacy
1519214077Sgibbs	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1520214077Sgibbs	    (void **)&id_str);
1521214077Sgibbs	if (error == 0) {
1522214077Sgibbs		t->id = strtoul(id_str, NULL, 0);
1523214077Sgibbs		free(id_str, M_XENSTORE);
1524214077Sgibbs	}
1525214077Sgibbs	return (error);
1526214077Sgibbs}
1527214077Sgibbs
1528214077Sgibbsint
1529214077Sgibbsxs_transaction_end(struct xs_transaction t, int abort)
1530214077Sgibbs{
1531214077Sgibbs	char abortstr[2];
1532214077Sgibbs
1533186557Skmacy	if (abort)
1534186557Skmacy		strcpy(abortstr, "F");
1535186557Skmacy	else
1536186557Skmacy		strcpy(abortstr, "T");
1537181624Skmacy
1538214077Sgibbs	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1539181624Skmacy}
1540181624Skmacy
1541186557Skmacyint
1542214077Sgibbsxs_scanf(struct xs_transaction t, const char *dir, const char *node,
1543214077Sgibbs     int *scancountp, const char *fmt, ...)
1544181624Skmacy{
1545186557Skmacy	va_list ap;
1546186557Skmacy	int error, ns;
1547186557Skmacy	char *val;
1548181624Skmacy
1549214077Sgibbs	error = xs_read(t, dir, node, NULL, (void **) &val);
1550186557Skmacy	if (error)
1551186557Skmacy		return (error);
1552181624Skmacy
1553186557Skmacy	va_start(ap, fmt);
1554186557Skmacy	ns = vsscanf(val, fmt, ap);
1555186557Skmacy	va_end(ap);
1556214077Sgibbs	free(val, M_XENSTORE);
1557186557Skmacy	/* Distinctive errno. */
1558186557Skmacy	if (ns == 0)
1559186557Skmacy		return (ERANGE);
1560186557Skmacy	if (scancountp)
1561186557Skmacy		*scancountp = ns;
1562186557Skmacy	return (0);
1563181624Skmacy}
1564181624Skmacy
1565186557Skmacyint
1566214077Sgibbsxs_vprintf(struct xs_transaction t,
1567214077Sgibbs    const char *dir, const char *node, const char *fmt, va_list ap)
1568181624Skmacy{
1569214077Sgibbs	struct sbuf *sb;
1570214077Sgibbs	int error;
1571214077Sgibbs
1572214077Sgibbs	sb = sbuf_new_auto();
1573214077Sgibbs	sbuf_vprintf(sb, fmt, ap);
1574214077Sgibbs	sbuf_finish(sb);
1575214077Sgibbs	error = xs_write(t, dir, node, sbuf_data(sb));
1576214077Sgibbs	sbuf_delete(sb);
1577214077Sgibbs
1578214077Sgibbs	return (error);
1579214077Sgibbs}
1580214077Sgibbs
1581214077Sgibbsint
1582214077Sgibbsxs_printf(struct xs_transaction t, const char *dir, const char *node,
1583214077Sgibbs     const char *fmt, ...)
1584214077Sgibbs{
1585186557Skmacy	va_list ap;
1586214077Sgibbs	int error;
1587181624Skmacy
1588186557Skmacy	va_start(ap, fmt);
1589214077Sgibbs	error = xs_vprintf(t, dir, node, fmt, ap);
1590186557Skmacy	va_end(ap);
1591181624Skmacy
1592186557Skmacy	return (error);
1593181624Skmacy}
1594181624Skmacy
1595186557Skmacyint
1596214077Sgibbsxs_gather(struct xs_transaction t, const char *dir, ...)
1597181624Skmacy{
1598186557Skmacy	va_list ap;
1599186557Skmacy	const char *name;
1600214077Sgibbs	int error;
1601181624Skmacy
1602186557Skmacy	va_start(ap, dir);
1603186557Skmacy	error = 0;
1604186557Skmacy	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1605186557Skmacy		const char *fmt = va_arg(ap, char *);
1606186557Skmacy		void *result = va_arg(ap, void *);
1607186557Skmacy		char *p;
1608181624Skmacy
1609214077Sgibbs		error = xs_read(t, dir, name, NULL, (void **) &p);
1610186557Skmacy		if (error)
1611186557Skmacy			break;
1612186557Skmacy
1613186557Skmacy		if (fmt) {
1614186557Skmacy			if (sscanf(p, fmt, result) == 0)
1615186557Skmacy				error = EINVAL;
1616214077Sgibbs			free(p, M_XENSTORE);
1617186557Skmacy		} else
1618186557Skmacy			*(char **)result = p;
1619186557Skmacy	}
1620186557Skmacy	va_end(ap);
1621186557Skmacy
1622186557Skmacy	return (error);
1623181624Skmacy}
1624181624Skmacy
1625186557Skmacyint
1626214077Sgibbsxs_register_watch(struct xs_watch *watch)
1627181624Skmacy{
1628186557Skmacy	/* Pointer in ascii is the token. */
1629186557Skmacy	char token[sizeof(watch) * 2 + 1];
1630186557Skmacy	int error;
1631181624Skmacy
1632186557Skmacy	sprintf(token, "%lX", (long)watch);
1633181624Skmacy
1634214077Sgibbs	sx_slock(&xs.suspend_mutex);
1635181624Skmacy
1636214077Sgibbs	mtx_lock(&xs.registered_watches_lock);
1637186557Skmacy	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1638214077Sgibbs	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1639214077Sgibbs	mtx_unlock(&xs.registered_watches_lock);
1640186557Skmacy
1641186557Skmacy	error = xs_watch(watch->node, token);
1642214077Sgibbs
1643186557Skmacy	/* Ignore errors due to multiple registration. */
1644214077Sgibbs	if (error == EEXIST)
1645214077Sgibbs		error = 0;
1646214077Sgibbs
1647214077Sgibbs	if (error != 0) {
1648214077Sgibbs		mtx_lock(&xs.registered_watches_lock);
1649186557Skmacy		LIST_REMOVE(watch, list);
1650214077Sgibbs		mtx_unlock(&xs.registered_watches_lock);
1651186557Skmacy	}
1652181624Skmacy
1653214077Sgibbs	sx_sunlock(&xs.suspend_mutex);
1654181624Skmacy
1655186557Skmacy	return (error);
1656181624Skmacy}
1657181624Skmacy
1658186557Skmacyvoid
1659214077Sgibbsxs_unregister_watch(struct xs_watch *watch)
1660181624Skmacy{
1661186557Skmacy	struct xs_stored_msg *msg, *tmp;
1662186557Skmacy	char token[sizeof(watch) * 2 + 1];
1663186557Skmacy	int error;
1664181624Skmacy
1665186557Skmacy	sprintf(token, "%lX", (long)watch);
1666181624Skmacy
1667214077Sgibbs	sx_slock(&xs.suspend_mutex);
1668214077Sgibbs
1669214077Sgibbs	mtx_lock(&xs.registered_watches_lock);
1670214077Sgibbs	if (find_watch(token) == NULL) {
1671214077Sgibbs		mtx_unlock(&xs.registered_watches_lock);
1672214077Sgibbs		sx_sunlock(&xs.suspend_mutex);
1673214077Sgibbs		return;
1674214077Sgibbs	}
1675186557Skmacy	LIST_REMOVE(watch, list);
1676214077Sgibbs	mtx_unlock(&xs.registered_watches_lock);
1677181624Skmacy
1678186557Skmacy	error = xs_unwatch(watch->node, token);
1679186557Skmacy	if (error)
1680214077Sgibbs		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1681186557Skmacy		    watch->node, error);
1682181624Skmacy
1683214077Sgibbs	sx_sunlock(&xs.suspend_mutex);
1684181624Skmacy
1685186557Skmacy	/* Cancel pending watch events. */
1686214077Sgibbs	mtx_lock(&xs.watch_events_lock);
1687214077Sgibbs	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1688186557Skmacy		if (msg->u.watch.handle != watch)
1689186557Skmacy			continue;
1690214077Sgibbs		TAILQ_REMOVE(&xs.watch_events, msg, list);
1691214077Sgibbs		free(msg->u.watch.vec, M_XENSTORE);
1692214077Sgibbs		free(msg, M_XENSTORE);
1693186557Skmacy	}
1694214077Sgibbs	mtx_unlock(&xs.watch_events_lock);
1695181624Skmacy
1696186557Skmacy	/* Flush any currently-executing callback, unless we are it. :-) */
1697214077Sgibbs	if (curproc->p_pid != xs.xenwatch_pid) {
1698214077Sgibbs		sx_xlock(&xs.xenwatch_mutex);
1699214077Sgibbs		sx_xunlock(&xs.xenwatch_mutex);
1700186557Skmacy	}
1701181624Skmacy}
1702316722Sdelphij
1703316722Sdelphijvoid
1704316722Sdelphijxs_lock(void)
1705316722Sdelphij{
1706316722Sdelphij
1707316722Sdelphij	sx_xlock(&xs.request_mutex);
1708316722Sdelphij	return;
1709316722Sdelphij}
1710316722Sdelphij
1711316722Sdelphijvoid
1712316722Sdelphijxs_unlock(void)
1713316722Sdelphij{
1714316722Sdelphij
1715316722Sdelphij	sx_xunlock(&xs.request_mutex);
1716316722Sdelphij	return;
1717316722Sdelphij}
1718316722Sdelphij
1719