1/******************************************************************************
2 * xenbus_comms.c
3 *
4 * Low level code to talks to Xen Store: ringbuffer and event channel.
5 *
6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35#include <linux/wait.h>
36#include <linux/interrupt.h>
37#include <linux/kthread.h>
38#include <linux/sched.h>
39#include <linux/err.h>
40#include <xen/xenbus.h>
41#include <asm/xen/hypervisor.h>
42#include <xen/events.h>
43#include <xen/page.h>
44#include "xenbus.h"
45
46/* A list of replies. Currently only one will ever be outstanding. */
47LIST_HEAD(xs_reply_list);
48
49/* A list of write requests. */
50LIST_HEAD(xb_write_list);
51DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
52DEFINE_MUTEX(xb_write_mutex);
53
54/* Protect xenbus reader thread against save/restore. */
55DEFINE_MUTEX(xs_response_mutex);
56
57static int xenbus_irq;
58static struct task_struct *xenbus_task;
59
60static irqreturn_t wake_waiting(int irq, void *unused)
61{
62	wake_up(&xb_waitq);
63	return IRQ_HANDLED;
64}
65
66static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
67{
68	return ((prod - cons) <= XENSTORE_RING_SIZE);
69}
70
71static void *get_output_chunk(XENSTORE_RING_IDX cons,
72			      XENSTORE_RING_IDX prod,
73			      char *buf, uint32_t *len)
74{
75	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
76	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
77		*len = XENSTORE_RING_SIZE - (prod - cons);
78	return buf + MASK_XENSTORE_IDX(prod);
79}
80
81static const void *get_input_chunk(XENSTORE_RING_IDX cons,
82				   XENSTORE_RING_IDX prod,
83				   const char *buf, uint32_t *len)
84{
85	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
86	if ((prod - cons) < *len)
87		*len = prod - cons;
88	return buf + MASK_XENSTORE_IDX(cons);
89}
90
91static int xb_data_to_write(void)
92{
93	struct xenstore_domain_interface *intf = xen_store_interface;
94
95	return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
96		!list_empty(&xb_write_list);
97}
98
99/**
100 * xb_write - low level write
101 * @data: buffer to send
102 * @len: length of buffer
103 *
104 * Returns number of bytes written or -err.
105 */
106static int xb_write(const void *data, unsigned int len)
107{
108	struct xenstore_domain_interface *intf = xen_store_interface;
109	XENSTORE_RING_IDX cons, prod;
110	unsigned int bytes = 0;
111
112	while (len != 0) {
113		void *dst;
114		unsigned int avail;
115
116		/* Read indexes, then verify. */
117		cons = intf->req_cons;
118		prod = intf->req_prod;
119		if (!check_indexes(cons, prod)) {
120			intf->req_cons = intf->req_prod = 0;
121			return -EIO;
122		}
123		if (!xb_data_to_write())
124			return bytes;
125
126		/* Must write data /after/ reading the consumer index. */
127		virt_mb();
128
129		dst = get_output_chunk(cons, prod, intf->req, &avail);
130		if (avail == 0)
131			continue;
132		if (avail > len)
133			avail = len;
134
135		memcpy(dst, data, avail);
136		data += avail;
137		len -= avail;
138		bytes += avail;
139
140		/* Other side must not see new producer until data is there. */
141		virt_wmb();
142		intf->req_prod += avail;
143
144		/* Implies mb(): other side will see the updated producer. */
145		if (prod <= intf->req_cons)
146			notify_remote_via_evtchn(xen_store_evtchn);
147	}
148
149	return bytes;
150}
151
152static int xb_data_to_read(void)
153{
154	struct xenstore_domain_interface *intf = xen_store_interface;
155	return (intf->rsp_cons != intf->rsp_prod);
156}
157
158static int xb_read(void *data, unsigned int len)
159{
160	struct xenstore_domain_interface *intf = xen_store_interface;
161	XENSTORE_RING_IDX cons, prod;
162	unsigned int bytes = 0;
163
164	while (len != 0) {
165		unsigned int avail;
166		const char *src;
167
168		/* Read indexes, then verify. */
169		cons = intf->rsp_cons;
170		prod = intf->rsp_prod;
171		if (cons == prod)
172			return bytes;
173
174		if (!check_indexes(cons, prod)) {
175			intf->rsp_cons = intf->rsp_prod = 0;
176			return -EIO;
177		}
178
179		src = get_input_chunk(cons, prod, intf->rsp, &avail);
180		if (avail == 0)
181			continue;
182		if (avail > len)
183			avail = len;
184
185		/* Must read data /after/ reading the producer index. */
186		virt_rmb();
187
188		memcpy(data, src, avail);
189		data += avail;
190		len -= avail;
191		bytes += avail;
192
193		/* Other side must not see free space until we've copied out */
194		virt_mb();
195		intf->rsp_cons += avail;
196
197		/* Implies mb(): other side will see the updated consumer. */
198		if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
199			notify_remote_via_evtchn(xen_store_evtchn);
200	}
201
202	return bytes;
203}
204
205static int process_msg(void)
206{
207	static struct {
208		struct xsd_sockmsg msg;
209		char *body;
210		union {
211			void *alloc;
212			struct xs_watch_event *watch;
213		};
214		bool in_msg;
215		bool in_hdr;
216		unsigned int read;
217	} state;
218	struct xb_req_data *req;
219	int err;
220	unsigned int len;
221
222	if (!state.in_msg) {
223		state.in_msg = true;
224		state.in_hdr = true;
225		state.read = 0;
226
227		/*
228		 * We must disallow save/restore while reading a message.
229		 * A partial read across s/r leaves us out of sync with
230		 * xenstored.
231		 * xs_response_mutex is locked as long as we are processing one
232		 * message. state.in_msg will be true as long as we are holding
233		 * the lock here.
234		 */
235		mutex_lock(&xs_response_mutex);
236
237		if (!xb_data_to_read()) {
238			/* We raced with save/restore: pending data 'gone'. */
239			mutex_unlock(&xs_response_mutex);
240			state.in_msg = false;
241			return 0;
242		}
243	}
244
245	if (state.in_hdr) {
246		if (state.read != sizeof(state.msg)) {
247			err = xb_read((void *)&state.msg + state.read,
248				      sizeof(state.msg) - state.read);
249			if (err < 0)
250				goto out;
251			state.read += err;
252			if (state.read != sizeof(state.msg))
253				return 0;
254			if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
255				err = -EINVAL;
256				goto out;
257			}
258		}
259
260		len = state.msg.len + 1;
261		if (state.msg.type == XS_WATCH_EVENT)
262			len += sizeof(*state.watch);
263
264		state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
265		if (!state.alloc)
266			return -ENOMEM;
267
268		if (state.msg.type == XS_WATCH_EVENT)
269			state.body = state.watch->body;
270		else
271			state.body = state.alloc;
272		state.in_hdr = false;
273		state.read = 0;
274	}
275
276	err = xb_read(state.body + state.read, state.msg.len - state.read);
277	if (err < 0)
278		goto out;
279
280	state.read += err;
281	if (state.read != state.msg.len)
282		return 0;
283
284	state.body[state.msg.len] = '\0';
285
286	if (state.msg.type == XS_WATCH_EVENT) {
287		state.watch->len = state.msg.len;
288		err = xs_watch_msg(state.watch);
289	} else {
290		err = -ENOENT;
291		mutex_lock(&xb_write_mutex);
292		list_for_each_entry(req, &xs_reply_list, list) {
293			if (req->msg.req_id == state.msg.req_id) {
294				list_del(&req->list);
295				err = 0;
296				break;
297			}
298		}
299		mutex_unlock(&xb_write_mutex);
300		if (err)
301			goto out;
302
303		if (req->state == xb_req_state_wait_reply) {
304			req->msg.req_id = req->caller_req_id;
305			req->msg.type = state.msg.type;
306			req->msg.len = state.msg.len;
307			req->body = state.body;
308			/* write body, then update state */
309			virt_wmb();
310			req->state = xb_req_state_got_reply;
311			req->cb(req);
312		} else
313			kfree(req);
314	}
315
316	mutex_unlock(&xs_response_mutex);
317
318	state.in_msg = false;
319	state.alloc = NULL;
320	return err;
321
322 out:
323	mutex_unlock(&xs_response_mutex);
324	state.in_msg = false;
325	kfree(state.alloc);
326	state.alloc = NULL;
327	return err;
328}
329
330static int process_writes(void)
331{
332	static struct {
333		struct xb_req_data *req;
334		int idx;
335		unsigned int written;
336	} state;
337	void *base;
338	unsigned int len;
339	int err = 0;
340
341	if (!xb_data_to_write())
342		return 0;
343
344	mutex_lock(&xb_write_mutex);
345
346	if (!state.req) {
347		state.req = list_first_entry(&xb_write_list,
348					     struct xb_req_data, list);
349		state.idx = -1;
350		state.written = 0;
351	}
352
353	if (state.req->state == xb_req_state_aborted)
354		goto out_err;
355
356	while (state.idx < state.req->num_vecs) {
357		if (state.idx < 0) {
358			base = &state.req->msg;
359			len = sizeof(state.req->msg);
360		} else {
361			base = state.req->vec[state.idx].iov_base;
362			len = state.req->vec[state.idx].iov_len;
363		}
364		err = xb_write(base + state.written, len - state.written);
365		if (err < 0)
366			goto out_err;
367		state.written += err;
368		if (state.written != len)
369			goto out;
370
371		state.idx++;
372		state.written = 0;
373	}
374
375	list_del(&state.req->list);
376	state.req->state = xb_req_state_wait_reply;
377	list_add_tail(&state.req->list, &xs_reply_list);
378	state.req = NULL;
379
380 out:
381	mutex_unlock(&xb_write_mutex);
382
383	return 0;
384
385 out_err:
386	state.req->msg.type = XS_ERROR;
387	state.req->err = err;
388	list_del(&state.req->list);
389	if (state.req->state == xb_req_state_aborted)
390		kfree(state.req);
391	else {
392		/* write err, then update state */
393		virt_wmb();
394		state.req->state = xb_req_state_got_reply;
395		wake_up(&state.req->wq);
396	}
397
398	mutex_unlock(&xb_write_mutex);
399
400	state.req = NULL;
401
402	return err;
403}
404
405static int xb_thread_work(void)
406{
407	return xb_data_to_read() || xb_data_to_write();
408}
409
410static int xenbus_thread(void *unused)
411{
412	int err;
413
414	while (!kthread_should_stop()) {
415		if (wait_event_interruptible(xb_waitq, xb_thread_work()))
416			continue;
417
418		err = process_msg();
419		if (err == -ENOMEM)
420			schedule();
421		else if (err)
422			pr_warn_ratelimited("error %d while reading message\n",
423					    err);
424
425		err = process_writes();
426		if (err)
427			pr_warn_ratelimited("error %d while writing message\n",
428					    err);
429	}
430
431	xenbus_task = NULL;
432	return 0;
433}
434
435/**
436 * xb_init_comms - Set up interrupt handler off store event channel.
437 */
438int xb_init_comms(void)
439{
440	struct xenstore_domain_interface *intf = xen_store_interface;
441
442	if (intf->req_prod != intf->req_cons)
443		pr_err("request ring is not quiescent (%08x:%08x)!\n",
444		       intf->req_cons, intf->req_prod);
445
446	if (intf->rsp_prod != intf->rsp_cons) {
447		pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
448			intf->rsp_cons, intf->rsp_prod);
449		/* breaks kdump */
450		if (!reset_devices)
451			intf->rsp_cons = intf->rsp_prod;
452	}
453
454	if (xenbus_irq) {
455		/* Already have an irq; assume we're resuming */
456		rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
457	} else {
458		int err;
459
460		err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
461						0, "xenbus", &xb_waitq);
462		if (err < 0) {
463			pr_err("request irq failed %i\n", err);
464			return err;
465		}
466
467		xenbus_irq = err;
468
469		if (!xenbus_task) {
470			xenbus_task = kthread_run(xenbus_thread, NULL,
471						  "xenbus");
472			if (IS_ERR(xenbus_task))
473				return PTR_ERR(xenbus_task);
474		}
475	}
476
477	return 0;
478}
479
480void xb_deinit_comms(void)
481{
482	unbind_from_irqhandler(xenbus_irq, &xb_waitq);
483	xenbus_irq = 0;
484}
485