1/******************************************************************************
2 * xenbus_xs.c
3 *
4 * This is the kernel equivalent of the "xs" library.  We don't need everything
5 * and we use xenbus_comms for communication.
6 *
7 * Copyright (C) 2005 Rusty Russell, IBM Corporation
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35
36#include <linux/unistd.h>
37#include <linux/errno.h>
38#include <linux/types.h>
39#include <linux/uio.h>
40#include <linux/kernel.h>
41#include <linux/string.h>
42#include <linux/err.h>
43#include <linux/slab.h>
44#include <linux/fcntl.h>
45#include <linux/kthread.h>
46#include <linux/reboot.h>
47#include <linux/rwsem.h>
48#include <linux/mutex.h>
49#include <asm/xen/hypervisor.h>
50#include <xen/xenbus.h>
51#include <xen/xen.h>
52#include "xenbus.h"
53
54/*
55 * Framework to protect suspend/resume handling against normal Xenstore
56 * message handling:
57 * During suspend/resume there must be no open transaction and no pending
58 * Xenstore request.
59 * New watch events happening in this time can be ignored by firing all watches
60 * after resume.
61 */
62
63/* Lock protecting enter/exit critical region. */
64static DEFINE_SPINLOCK(xs_state_lock);
65/* Number of users in critical region (protected by xs_state_lock). */
66static unsigned int xs_state_users;
67/* Suspend handler waiting or already active (protected by xs_state_lock)? */
68static int xs_suspend_active;
69/* Unique Xenstore request id (protected by xs_state_lock). */
70static uint32_t xs_request_id;
71
72/* Wait queue for all callers waiting for critical region to become usable. */
73static DECLARE_WAIT_QUEUE_HEAD(xs_state_enter_wq);
74/* Wait queue for suspend handling waiting for critical region being empty. */
75static DECLARE_WAIT_QUEUE_HEAD(xs_state_exit_wq);
76
77/* List of registered watches, and a lock to protect it. */
78static LIST_HEAD(watches);
79static DEFINE_SPINLOCK(watches_lock);
80
81/* List of pending watch callback events, and a lock to protect it. */
82static LIST_HEAD(watch_events);
83static DEFINE_SPINLOCK(watch_events_lock);
84
85/* Protect watch (de)register against save/restore. */
86static DECLARE_RWSEM(xs_watch_rwsem);
87
88/*
89 * Details of the xenwatch callback kernel thread. The thread waits on the
90 * watch_events_waitq for work to do (queued on watch_events list). When it
91 * wakes up it acquires the xenwatch_mutex before reading the list and
92 * carrying out work.
93 */
94static pid_t xenwatch_pid;
95static DEFINE_MUTEX(xenwatch_mutex);
96static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
97
98static void xs_suspend_enter(void)
99{
100	spin_lock(&xs_state_lock);
101	xs_suspend_active++;
102	spin_unlock(&xs_state_lock);
103	wait_event(xs_state_exit_wq, xs_state_users == 0);
104}
105
106static void xs_suspend_exit(void)
107{
108	xb_dev_generation_id++;
109	spin_lock(&xs_state_lock);
110	xs_suspend_active--;
111	spin_unlock(&xs_state_lock);
112	wake_up_all(&xs_state_enter_wq);
113}
114
115static uint32_t xs_request_enter(struct xb_req_data *req)
116{
117	uint32_t rq_id;
118
119	req->type = req->msg.type;
120
121	spin_lock(&xs_state_lock);
122
123	while (!xs_state_users && xs_suspend_active) {
124		spin_unlock(&xs_state_lock);
125		wait_event(xs_state_enter_wq, xs_suspend_active == 0);
126		spin_lock(&xs_state_lock);
127	}
128
129	if (req->type == XS_TRANSACTION_START && !req->user_req)
130		xs_state_users++;
131	xs_state_users++;
132	rq_id = xs_request_id++;
133
134	spin_unlock(&xs_state_lock);
135
136	return rq_id;
137}
138
139void xs_request_exit(struct xb_req_data *req)
140{
141	spin_lock(&xs_state_lock);
142	xs_state_users--;
143	if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) ||
144	    (req->type == XS_TRANSACTION_END && !req->user_req &&
145	     !WARN_ON_ONCE(req->msg.type == XS_ERROR &&
146			   !strcmp(req->body, "ENOENT"))))
147		xs_state_users--;
148	spin_unlock(&xs_state_lock);
149
150	if (xs_suspend_active && !xs_state_users)
151		wake_up(&xs_state_exit_wq);
152}
153
154static int get_error(const char *errorstring)
155{
156	unsigned int i;
157
158	for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
159		if (i == ARRAY_SIZE(xsd_errors) - 1) {
160			pr_warn("xen store gave: unknown error %s\n",
161				errorstring);
162			return EINVAL;
163		}
164	}
165	return xsd_errors[i].errnum;
166}
167
168static bool xenbus_ok(void)
169{
170	switch (xen_store_domain_type) {
171	case XS_LOCAL:
172		switch (system_state) {
173		case SYSTEM_POWER_OFF:
174		case SYSTEM_RESTART:
175		case SYSTEM_HALT:
176			return false;
177		default:
178			break;
179		}
180		return true;
181	case XS_PV:
182	case XS_HVM:
183		/* FIXME: Could check that the remote domain is alive,
184		 * but it is normally initial domain. */
185		return true;
186	default:
187		break;
188	}
189	return false;
190}
191
192static bool test_reply(struct xb_req_data *req)
193{
194	if (req->state == xb_req_state_got_reply || !xenbus_ok()) {
195		/* read req->state before all other fields */
196		virt_rmb();
197		return true;
198	}
199
200	/* Make sure to reread req->state each time. */
201	barrier();
202
203	return false;
204}
205
206static void *read_reply(struct xb_req_data *req)
207{
208	do {
209		wait_event(req->wq, test_reply(req));
210
211		if (!xenbus_ok())
212			/*
213			 * If we are in the process of being shut-down there is
214			 * no point of trying to contact XenBus - it is either
215			 * killed (xenstored application) or the other domain
216			 * has been killed or is unreachable.
217			 */
218			return ERR_PTR(-EIO);
219		if (req->err)
220			return ERR_PTR(req->err);
221
222	} while (req->state != xb_req_state_got_reply);
223
224	return req->body;
225}
226
227static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg)
228{
229	bool notify;
230
231	req->msg = *msg;
232	req->err = 0;
233	req->state = xb_req_state_queued;
234	init_waitqueue_head(&req->wq);
235
236	/* Save the caller req_id and restore it later in the reply */
237	req->caller_req_id = req->msg.req_id;
238	req->msg.req_id = xs_request_enter(req);
239
240	mutex_lock(&xb_write_mutex);
241	list_add_tail(&req->list, &xb_write_list);
242	notify = list_is_singular(&xb_write_list);
243	mutex_unlock(&xb_write_mutex);
244
245	if (notify)
246		wake_up(&xb_waitq);
247}
248
249static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg)
250{
251	void *ret;
252
253	ret = read_reply(req);
254
255	xs_request_exit(req);
256
257	msg->type = req->msg.type;
258	msg->len = req->msg.len;
259
260	mutex_lock(&xb_write_mutex);
261	if (req->state == xb_req_state_queued ||
262	    req->state == xb_req_state_wait_reply)
263		req->state = xb_req_state_aborted;
264	else
265		kfree(req);
266	mutex_unlock(&xb_write_mutex);
267
268	return ret;
269}
270
271static void xs_wake_up(struct xb_req_data *req)
272{
273	wake_up(&req->wq);
274}
275
276int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par)
277{
278	struct xb_req_data *req;
279	struct kvec *vec;
280
281	req = kmalloc(sizeof(*req) + sizeof(*vec), GFP_KERNEL);
282	if (!req)
283		return -ENOMEM;
284
285	vec = (struct kvec *)(req + 1);
286	vec->iov_len = msg->len;
287	vec->iov_base = msg + 1;
288
289	req->vec = vec;
290	req->num_vecs = 1;
291	req->cb = xenbus_dev_queue_reply;
292	req->par = par;
293	req->user_req = true;
294
295	xs_send(req, msg);
296
297	return 0;
298}
299EXPORT_SYMBOL(xenbus_dev_request_and_reply);
300
301/* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
302static void *xs_talkv(struct xenbus_transaction t,
303		      enum xsd_sockmsg_type type,
304		      const struct kvec *iovec,
305		      unsigned int num_vecs,
306		      unsigned int *len)
307{
308	struct xb_req_data *req;
309	struct xsd_sockmsg msg;
310	void *ret = NULL;
311	unsigned int i;
312	int err;
313
314	req = kmalloc(sizeof(*req), GFP_NOIO | __GFP_HIGH);
315	if (!req)
316		return ERR_PTR(-ENOMEM);
317
318	req->vec = iovec;
319	req->num_vecs = num_vecs;
320	req->cb = xs_wake_up;
321	req->user_req = false;
322
323	msg.req_id = 0;
324	msg.tx_id = t.id;
325	msg.type = type;
326	msg.len = 0;
327	for (i = 0; i < num_vecs; i++)
328		msg.len += iovec[i].iov_len;
329
330	xs_send(req, &msg);
331
332	ret = xs_wait_for_reply(req, &msg);
333	if (len)
334		*len = msg.len;
335
336	if (IS_ERR(ret))
337		return ret;
338
339	if (msg.type == XS_ERROR) {
340		err = get_error(ret);
341		kfree(ret);
342		return ERR_PTR(-err);
343	}
344
345	if (msg.type != type) {
346		pr_warn_ratelimited("unexpected type [%d], expected [%d]\n",
347				    msg.type, type);
348		kfree(ret);
349		return ERR_PTR(-EINVAL);
350	}
351	return ret;
352}
353
354/* Simplified version of xs_talkv: single message. */
355static void *xs_single(struct xenbus_transaction t,
356		       enum xsd_sockmsg_type type,
357		       const char *string,
358		       unsigned int *len)
359{
360	struct kvec iovec;
361
362	iovec.iov_base = (void *)string;
363	iovec.iov_len = strlen(string) + 1;
364	return xs_talkv(t, type, &iovec, 1, len);
365}
366
367/* Many commands only need an ack, don't care what it says. */
368static int xs_error(char *reply)
369{
370	if (IS_ERR(reply))
371		return PTR_ERR(reply);
372	kfree(reply);
373	return 0;
374}
375
376static unsigned int count_strings(const char *strings, unsigned int len)
377{
378	unsigned int num;
379	const char *p;
380
381	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
382		num++;
383
384	return num;
385}
386
387/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
388static char *join(const char *dir, const char *name)
389{
390	char *buffer;
391
392	if (strlen(name) == 0)
393		buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir);
394	else
395		buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name);
396	return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
397}
398
399static char **split(char *strings, unsigned int len, unsigned int *num)
400{
401	char *p, **ret;
402
403	/* Count the strings. */
404	*num = count_strings(strings, len);
405
406	/* Transfer to one big alloc for easy freeing. */
407	ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH);
408	if (!ret) {
409		kfree(strings);
410		return ERR_PTR(-ENOMEM);
411	}
412	memcpy(&ret[*num], strings, len);
413	kfree(strings);
414
415	strings = (char *)&ret[*num];
416	for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
417		ret[(*num)++] = p;
418
419	return ret;
420}
421
422char **xenbus_directory(struct xenbus_transaction t,
423			const char *dir, const char *node, unsigned int *num)
424{
425	char *strings, *path;
426	unsigned int len;
427
428	path = join(dir, node);
429	if (IS_ERR(path))
430		return (char **)path;
431
432	strings = xs_single(t, XS_DIRECTORY, path, &len);
433	kfree(path);
434	if (IS_ERR(strings))
435		return (char **)strings;
436
437	return split(strings, len, num);
438}
439EXPORT_SYMBOL_GPL(xenbus_directory);
440
441/* Check if a path exists. Return 1 if it does. */
442int xenbus_exists(struct xenbus_transaction t,
443		  const char *dir, const char *node)
444{
445	char **d;
446	int dir_n;
447
448	d = xenbus_directory(t, dir, node, &dir_n);
449	if (IS_ERR(d))
450		return 0;
451	kfree(d);
452	return 1;
453}
454EXPORT_SYMBOL_GPL(xenbus_exists);
455
456/* Get the value of a single file.
457 * Returns a kmalloced value: call free() on it after use.
458 * len indicates length in bytes.
459 */
460void *xenbus_read(struct xenbus_transaction t,
461		  const char *dir, const char *node, unsigned int *len)
462{
463	char *path;
464	void *ret;
465
466	path = join(dir, node);
467	if (IS_ERR(path))
468		return (void *)path;
469
470	ret = xs_single(t, XS_READ, path, len);
471	kfree(path);
472	return ret;
473}
474EXPORT_SYMBOL_GPL(xenbus_read);
475
476/* Write the value of a single file.
477 * Returns -err on failure.
478 */
479int xenbus_write(struct xenbus_transaction t,
480		 const char *dir, const char *node, const char *string)
481{
482	const char *path;
483	struct kvec iovec[2];
484	int ret;
485
486	path = join(dir, node);
487	if (IS_ERR(path))
488		return PTR_ERR(path);
489
490	iovec[0].iov_base = (void *)path;
491	iovec[0].iov_len = strlen(path) + 1;
492	iovec[1].iov_base = (void *)string;
493	iovec[1].iov_len = strlen(string);
494
495	ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
496	kfree(path);
497	return ret;
498}
499EXPORT_SYMBOL_GPL(xenbus_write);
500
501/* Create a new directory. */
502int xenbus_mkdir(struct xenbus_transaction t,
503		 const char *dir, const char *node)
504{
505	char *path;
506	int ret;
507
508	path = join(dir, node);
509	if (IS_ERR(path))
510		return PTR_ERR(path);
511
512	ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
513	kfree(path);
514	return ret;
515}
516EXPORT_SYMBOL_GPL(xenbus_mkdir);
517
518/* Destroy a file or directory (directories must be empty). */
519int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
520{
521	char *path;
522	int ret;
523
524	path = join(dir, node);
525	if (IS_ERR(path))
526		return PTR_ERR(path);
527
528	ret = xs_error(xs_single(t, XS_RM, path, NULL));
529	kfree(path);
530	return ret;
531}
532EXPORT_SYMBOL_GPL(xenbus_rm);
533
534/* Start a transaction: changes by others will not be seen during this
535 * transaction, and changes will not be visible to others until end.
536 */
537int xenbus_transaction_start(struct xenbus_transaction *t)
538{
539	char *id_str;
540
541	id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
542	if (IS_ERR(id_str))
543		return PTR_ERR(id_str);
544
545	t->id = simple_strtoul(id_str, NULL, 0);
546	kfree(id_str);
547	return 0;
548}
549EXPORT_SYMBOL_GPL(xenbus_transaction_start);
550
551/* End a transaction.
552 * If abandon is true, transaction is discarded instead of committed.
553 */
554int xenbus_transaction_end(struct xenbus_transaction t, int abort)
555{
556	char abortstr[2];
557
558	if (abort)
559		strcpy(abortstr, "F");
560	else
561		strcpy(abortstr, "T");
562
563	return xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
564}
565EXPORT_SYMBOL_GPL(xenbus_transaction_end);
566
567/* Single read and scanf: returns -errno or num scanned. */
568int xenbus_scanf(struct xenbus_transaction t,
569		 const char *dir, const char *node, const char *fmt, ...)
570{
571	va_list ap;
572	int ret;
573	char *val;
574
575	val = xenbus_read(t, dir, node, NULL);
576	if (IS_ERR(val))
577		return PTR_ERR(val);
578
579	va_start(ap, fmt);
580	ret = vsscanf(val, fmt, ap);
581	va_end(ap);
582	kfree(val);
583	/* Distinctive errno. */
584	if (ret == 0)
585		return -ERANGE;
586	return ret;
587}
588EXPORT_SYMBOL_GPL(xenbus_scanf);
589
590/* Read an (optional) unsigned value. */
591unsigned int xenbus_read_unsigned(const char *dir, const char *node,
592				  unsigned int default_val)
593{
594	unsigned int val;
595	int ret;
596
597	ret = xenbus_scanf(XBT_NIL, dir, node, "%u", &val);
598	if (ret <= 0)
599		val = default_val;
600
601	return val;
602}
603EXPORT_SYMBOL_GPL(xenbus_read_unsigned);
604
605/* Single printf and write: returns -errno or 0. */
606int xenbus_printf(struct xenbus_transaction t,
607		  const char *dir, const char *node, const char *fmt, ...)
608{
609	va_list ap;
610	int ret;
611	char *buf;
612
613	va_start(ap, fmt);
614	buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap);
615	va_end(ap);
616
617	if (!buf)
618		return -ENOMEM;
619
620	ret = xenbus_write(t, dir, node, buf);
621
622	kfree(buf);
623
624	return ret;
625}
626EXPORT_SYMBOL_GPL(xenbus_printf);
627
628/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
629int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
630{
631	va_list ap;
632	const char *name;
633	int ret = 0;
634
635	va_start(ap, dir);
636	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
637		const char *fmt = va_arg(ap, char *);
638		void *result = va_arg(ap, void *);
639		char *p;
640
641		p = xenbus_read(t, dir, name, NULL);
642		if (IS_ERR(p)) {
643			ret = PTR_ERR(p);
644			break;
645		}
646		if (fmt) {
647			if (sscanf(p, fmt, result) == 0)
648				ret = -EINVAL;
649			kfree(p);
650		} else
651			*(char **)result = p;
652	}
653	va_end(ap);
654	return ret;
655}
656EXPORT_SYMBOL_GPL(xenbus_gather);
657
658static int xs_watch(const char *path, const char *token)
659{
660	struct kvec iov[2];
661
662	iov[0].iov_base = (void *)path;
663	iov[0].iov_len = strlen(path) + 1;
664	iov[1].iov_base = (void *)token;
665	iov[1].iov_len = strlen(token) + 1;
666
667	return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
668				 ARRAY_SIZE(iov), NULL));
669}
670
671static int xs_unwatch(const char *path, const char *token)
672{
673	struct kvec iov[2];
674
675	iov[0].iov_base = (char *)path;
676	iov[0].iov_len = strlen(path) + 1;
677	iov[1].iov_base = (char *)token;
678	iov[1].iov_len = strlen(token) + 1;
679
680	return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
681				 ARRAY_SIZE(iov), NULL));
682}
683
684static struct xenbus_watch *find_watch(const char *token)
685{
686	struct xenbus_watch *i, *cmp;
687
688	cmp = (void *)simple_strtoul(token, NULL, 16);
689
690	list_for_each_entry(i, &watches, list)
691		if (i == cmp)
692			return i;
693
694	return NULL;
695}
696
697int xs_watch_msg(struct xs_watch_event *event)
698{
699	if (count_strings(event->body, event->len) != 2) {
700		kfree(event);
701		return -EINVAL;
702	}
703	event->path = (const char *)event->body;
704	event->token = (const char *)strchr(event->body, '\0') + 1;
705
706	spin_lock(&watches_lock);
707	event->handle = find_watch(event->token);
708	if (event->handle != NULL &&
709			(!event->handle->will_handle ||
710			 event->handle->will_handle(event->handle,
711				 event->path, event->token))) {
712		spin_lock(&watch_events_lock);
713		list_add_tail(&event->list, &watch_events);
714		event->handle->nr_pending++;
715		wake_up(&watch_events_waitq);
716		spin_unlock(&watch_events_lock);
717	} else
718		kfree(event);
719	spin_unlock(&watches_lock);
720
721	return 0;
722}
723
724/*
725 * Certain older XenBus toolstack cannot handle reading values that are
726 * not populated. Some Xen 3.4 installation are incapable of doing this
727 * so if we are running on anything older than 4 do not attempt to read
728 * control/platform-feature-xs_reset_watches.
729 */
730static bool xen_strict_xenbus_quirk(void)
731{
732#ifdef CONFIG_X86
733	uint32_t eax, ebx, ecx, edx, base;
734
735	base = xen_cpuid_base();
736	cpuid(base + 1, &eax, &ebx, &ecx, &edx);
737
738	if ((eax >> 16) < 4)
739		return true;
740#endif
741	return false;
742
743}
744static void xs_reset_watches(void)
745{
746	int err;
747
748	if (!xen_hvm_domain() || xen_initial_domain())
749		return;
750
751	if (xen_strict_xenbus_quirk())
752		return;
753
754	if (!xenbus_read_unsigned("control",
755				  "platform-feature-xs_reset_watches", 0))
756		return;
757
758	err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
759	if (err && err != -EEXIST)
760		pr_warn("xs_reset_watches failed: %d\n", err);
761}
762
763/* Register callback to watch this node. */
764int register_xenbus_watch(struct xenbus_watch *watch)
765{
766	/* Pointer in ascii is the token. */
767	char token[sizeof(watch) * 2 + 1];
768	int err;
769
770	sprintf(token, "%lX", (long)watch);
771
772	watch->nr_pending = 0;
773
774	down_read(&xs_watch_rwsem);
775
776	spin_lock(&watches_lock);
777	BUG_ON(find_watch(token));
778	list_add(&watch->list, &watches);
779	spin_unlock(&watches_lock);
780
781	err = xs_watch(watch->node, token);
782
783	if (err) {
784		spin_lock(&watches_lock);
785		list_del(&watch->list);
786		spin_unlock(&watches_lock);
787	}
788
789	up_read(&xs_watch_rwsem);
790
791	return err;
792}
793EXPORT_SYMBOL_GPL(register_xenbus_watch);
794
795void unregister_xenbus_watch(struct xenbus_watch *watch)
796{
797	struct xs_watch_event *event, *tmp;
798	char token[sizeof(watch) * 2 + 1];
799	int err;
800
801	sprintf(token, "%lX", (long)watch);
802
803	down_read(&xs_watch_rwsem);
804
805	spin_lock(&watches_lock);
806	BUG_ON(!find_watch(token));
807	list_del(&watch->list);
808	spin_unlock(&watches_lock);
809
810	err = xs_unwatch(watch->node, token);
811	if (err)
812		pr_warn("Failed to release watch %s: %i\n", watch->node, err);
813
814	up_read(&xs_watch_rwsem);
815
816	/* Make sure there are no callbacks running currently (unless
817	   its us) */
818	if (current->pid != xenwatch_pid)
819		mutex_lock(&xenwatch_mutex);
820
821	/* Cancel pending watch events. */
822	spin_lock(&watch_events_lock);
823	if (watch->nr_pending) {
824		list_for_each_entry_safe(event, tmp, &watch_events, list) {
825			if (event->handle != watch)
826				continue;
827			list_del(&event->list);
828			kfree(event);
829		}
830		watch->nr_pending = 0;
831	}
832	spin_unlock(&watch_events_lock);
833
834	if (current->pid != xenwatch_pid)
835		mutex_unlock(&xenwatch_mutex);
836}
837EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
838
839void xs_suspend(void)
840{
841	xs_suspend_enter();
842
843	mutex_lock(&xs_response_mutex);
844	down_write(&xs_watch_rwsem);
845}
846
847void xs_resume(void)
848{
849	struct xenbus_watch *watch;
850	char token[sizeof(watch) * 2 + 1];
851
852	xb_init_comms();
853
854	mutex_unlock(&xs_response_mutex);
855
856	xs_suspend_exit();
857
858	/* No need for watches_lock: the xs_watch_rwsem is sufficient. */
859	list_for_each_entry(watch, &watches, list) {
860		sprintf(token, "%lX", (long)watch);
861		xs_watch(watch->node, token);
862	}
863
864	up_write(&xs_watch_rwsem);
865}
866
867void xs_suspend_cancel(void)
868{
869	up_write(&xs_watch_rwsem);
870	mutex_unlock(&xs_response_mutex);
871
872	xs_suspend_exit();
873}
874
875static int xenwatch_thread(void *unused)
876{
877	struct xs_watch_event *event;
878
879	xenwatch_pid = current->pid;
880
881	for (;;) {
882		wait_event_interruptible(watch_events_waitq,
883					 !list_empty(&watch_events));
884
885		if (kthread_should_stop())
886			break;
887
888		mutex_lock(&xenwatch_mutex);
889
890		spin_lock(&watch_events_lock);
891		event = list_first_entry_or_null(&watch_events,
892				struct xs_watch_event, list);
893		if (event) {
894			list_del(&event->list);
895			event->handle->nr_pending--;
896		}
897		spin_unlock(&watch_events_lock);
898
899		if (event) {
900			event->handle->callback(event->handle, event->path,
901						event->token);
902			kfree(event);
903		}
904
905		mutex_unlock(&xenwatch_mutex);
906	}
907
908	return 0;
909}
910
911/*
912 * Wake up all threads waiting for a xenstore reply. In case of shutdown all
913 * pending replies will be marked as "aborted" in order to let the waiters
914 * return in spite of xenstore possibly no longer being able to reply. This
915 * will avoid blocking shutdown by a thread waiting for xenstore but being
916 * necessary for shutdown processing to proceed.
917 */
918static int xs_reboot_notify(struct notifier_block *nb,
919			    unsigned long code, void *unused)
920{
921	struct xb_req_data *req;
922
923	mutex_lock(&xb_write_mutex);
924	list_for_each_entry(req, &xs_reply_list, list)
925		wake_up(&req->wq);
926	list_for_each_entry(req, &xb_write_list, list)
927		wake_up(&req->wq);
928	mutex_unlock(&xb_write_mutex);
929	return NOTIFY_DONE;
930}
931
932static struct notifier_block xs_reboot_nb = {
933	.notifier_call = xs_reboot_notify,
934};
935
936int xs_init(void)
937{
938	int err;
939	struct task_struct *task;
940
941	register_reboot_notifier(&xs_reboot_nb);
942
943	/* Initialize the shared memory rings to talk to xenstored */
944	err = xb_init_comms();
945	if (err)
946		return err;
947
948	task = kthread_run(xenwatch_thread, NULL, "xenwatch");
949	if (IS_ERR(task))
950		return PTR_ERR(task);
951
952	/* shutdown watches for kexec boot */
953	xs_reset_watches();
954
955	return 0;
956}
957