1/******************************************************************************
2 * Client-facing interface for the Xenbus driver.  In other words, the
3 * interface between the Xenbus and the device-specific code, be it the
4 * frontend or the backend of that driver.
5 *
6 * Copyright (C) 2005 XenSource Ltd
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#include <linux/mm.h>
34#include <linux/slab.h>
35#include <linux/types.h>
36#include <linux/spinlock.h>
37#include <linux/vmalloc.h>
38#include <linux/export.h>
39#include <asm/xen/hypervisor.h>
40#include <xen/page.h>
41#include <xen/interface/xen.h>
42#include <xen/interface/event_channel.h>
43#include <xen/balloon.h>
44#include <xen/events.h>
45#include <xen/grant_table.h>
46#include <xen/xenbus.h>
47#include <xen/xen.h>
48#include <xen/features.h>
49
50#include "xenbus.h"
51
52#define XENBUS_PAGES(_grants)	(DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE))
53
54#define XENBUS_MAX_RING_PAGES	(XENBUS_PAGES(XENBUS_MAX_RING_GRANTS))
55
56struct xenbus_map_node {
57	struct list_head next;
58	union {
59		struct {
60			struct vm_struct *area;
61		} pv;
62		struct {
63			struct page *pages[XENBUS_MAX_RING_PAGES];
64			unsigned long addrs[XENBUS_MAX_RING_GRANTS];
65			void *addr;
66		} hvm;
67	};
68	grant_handle_t handles[XENBUS_MAX_RING_GRANTS];
69	unsigned int   nr_handles;
70};
71
72struct map_ring_valloc {
73	struct xenbus_map_node *node;
74
75	/* Why do we need two arrays? See comment of __xenbus_map_ring */
76	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
77	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
78
79	struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS];
80	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
81
82	unsigned int idx;
83};
84
85static DEFINE_SPINLOCK(xenbus_valloc_lock);
86static LIST_HEAD(xenbus_valloc_pages);
87
88struct xenbus_ring_ops {
89	int (*map)(struct xenbus_device *dev, struct map_ring_valloc *info,
90		   grant_ref_t *gnt_refs, unsigned int nr_grefs,
91		   void **vaddr);
92	int (*unmap)(struct xenbus_device *dev, void *vaddr);
93};
94
95static const struct xenbus_ring_ops *ring_ops __read_mostly;
96
97const char *xenbus_strstate(enum xenbus_state state)
98{
99	static const char *const name[] = {
100		[ XenbusStateUnknown      ] = "Unknown",
101		[ XenbusStateInitialising ] = "Initialising",
102		[ XenbusStateInitWait     ] = "InitWait",
103		[ XenbusStateInitialised  ] = "Initialised",
104		[ XenbusStateConnected    ] = "Connected",
105		[ XenbusStateClosing      ] = "Closing",
106		[ XenbusStateClosed	  ] = "Closed",
107		[XenbusStateReconfiguring] = "Reconfiguring",
108		[XenbusStateReconfigured] = "Reconfigured",
109	};
110	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
111}
112EXPORT_SYMBOL_GPL(xenbus_strstate);
113
114/**
115 * xenbus_watch_path - register a watch
116 * @dev: xenbus device
117 * @path: path to watch
118 * @watch: watch to register
119 * @will_handle: events queuing determine callback
120 * @callback: callback to register
121 *
122 * Register a @watch on the given path, using the given xenbus_watch structure
123 * for storage, @will_handle function as the callback to determine if each
124 * event need to be queued, and the given @callback function as the callback.
125 * On success, the given @path will be saved as @watch->node, and remains the
126 * caller's to free.  On error, @watch->node will be NULL, the device will
127 * switch to %XenbusStateClosing, and the error will be saved in the store.
128 *
129 * Returns: %0 on success or -errno on error
130 */
131int xenbus_watch_path(struct xenbus_device *dev, const char *path,
132		      struct xenbus_watch *watch,
133		      bool (*will_handle)(struct xenbus_watch *,
134					  const char *, const char *),
135		      void (*callback)(struct xenbus_watch *,
136				       const char *, const char *))
137{
138	int err;
139
140	watch->node = path;
141	watch->will_handle = will_handle;
142	watch->callback = callback;
143
144	err = register_xenbus_watch(watch);
145
146	if (err) {
147		watch->node = NULL;
148		watch->will_handle = NULL;
149		watch->callback = NULL;
150		xenbus_dev_fatal(dev, err, "adding watch on %s", path);
151	}
152
153	return err;
154}
155EXPORT_SYMBOL_GPL(xenbus_watch_path);
156
157
158/**
159 * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
160 * @dev: xenbus device
161 * @watch: watch to register
162 * @will_handle: events queuing determine callback
163 * @callback: callback to register
164 * @pathfmt: format of path to watch
165 *
166 * Register a watch on the given @path, using the given xenbus_watch
167 * structure for storage, @will_handle function as the callback to determine if
168 * each event need to be queued, and the given @callback function as the
169 * callback.  On success, the watched path (@path/@path2) will be saved
170 * as @watch->node, and becomes the caller's to kfree().
171 * On error, watch->node will be NULL, so the caller has nothing to
172 * free, the device will switch to %XenbusStateClosing, and the error will be
173 * saved in the store.
174 *
175 * Returns: %0 on success or -errno on error
176 */
177int xenbus_watch_pathfmt(struct xenbus_device *dev,
178			 struct xenbus_watch *watch,
179			 bool (*will_handle)(struct xenbus_watch *,
180					const char *, const char *),
181			 void (*callback)(struct xenbus_watch *,
182					  const char *, const char *),
183			 const char *pathfmt, ...)
184{
185	int err;
186	va_list ap;
187	char *path;
188
189	va_start(ap, pathfmt);
190	path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
191	va_end(ap);
192
193	if (!path) {
194		xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
195		return -ENOMEM;
196	}
197	err = xenbus_watch_path(dev, path, watch, will_handle, callback);
198
199	if (err)
200		kfree(path);
201	return err;
202}
203EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
204
205static void xenbus_switch_fatal(struct xenbus_device *, int, int,
206				const char *, ...);
207
208static int
209__xenbus_switch_state(struct xenbus_device *dev,
210		      enum xenbus_state state, int depth)
211{
212	/* We check whether the state is currently set to the given value, and
213	   if not, then the state is set.  We don't want to unconditionally
214	   write the given state, because we don't want to fire watches
215	   unnecessarily.  Furthermore, if the node has gone, we don't write
216	   to it, as the device will be tearing down, and we don't want to
217	   resurrect that directory.
218
219	   Note that, because of this cached value of our state, this
220	   function will not take a caller's Xenstore transaction
221	   (something it was trying to in the past) because dev->state
222	   would not get reset if the transaction was aborted.
223	 */
224
225	struct xenbus_transaction xbt;
226	int current_state;
227	int err, abort;
228
229	if (state == dev->state)
230		return 0;
231
232again:
233	abort = 1;
234
235	err = xenbus_transaction_start(&xbt);
236	if (err) {
237		xenbus_switch_fatal(dev, depth, err, "starting transaction");
238		return 0;
239	}
240
241	err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
242	if (err != 1)
243		goto abort;
244
245	err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
246	if (err) {
247		xenbus_switch_fatal(dev, depth, err, "writing new state");
248		goto abort;
249	}
250
251	abort = 0;
252abort:
253	err = xenbus_transaction_end(xbt, abort);
254	if (err) {
255		if (err == -EAGAIN && !abort)
256			goto again;
257		xenbus_switch_fatal(dev, depth, err, "ending transaction");
258	} else
259		dev->state = state;
260
261	return 0;
262}
263
264/**
265 * xenbus_switch_state - save the new state of a driver
266 * @dev: xenbus device
267 * @state: new state
268 *
269 * Advertise in the store a change of the given driver to the given new_state.
270 * On error, the device will switch to XenbusStateClosing, and the error
271 * will be saved in the store.
272 *
273 * Returns: %0 on success or -errno on error
274 */
275int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
276{
277	return __xenbus_switch_state(dev, state, 0);
278}
279
280EXPORT_SYMBOL_GPL(xenbus_switch_state);
281
282int xenbus_frontend_closed(struct xenbus_device *dev)
283{
284	xenbus_switch_state(dev, XenbusStateClosed);
285	complete(&dev->down);
286	return 0;
287}
288EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
289
290static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
291				const char *fmt, va_list ap)
292{
293	unsigned int len;
294	char *printf_buffer;
295	char *path_buffer;
296
297#define PRINTF_BUFFER_SIZE 4096
298
299	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
300	if (!printf_buffer)
301		return;
302
303	len = sprintf(printf_buffer, "%i ", -err);
304	vsnprintf(printf_buffer + len, PRINTF_BUFFER_SIZE - len, fmt, ap);
305
306	dev_err(&dev->dev, "%s\n", printf_buffer);
307
308	path_buffer = kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
309	if (path_buffer)
310		xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer);
311
312	kfree(printf_buffer);
313	kfree(path_buffer);
314}
315
316/**
317 * xenbus_dev_error - place an error message into the store
318 * @dev: xenbus device
319 * @err: error to report
320 * @fmt: error message format
321 *
322 * Report the given negative errno into the store, along with the given
323 * formatted message.
324 */
325void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
326{
327	va_list ap;
328
329	va_start(ap, fmt);
330	xenbus_va_dev_error(dev, err, fmt, ap);
331	va_end(ap);
332}
333EXPORT_SYMBOL_GPL(xenbus_dev_error);
334
335/**
336 * xenbus_dev_fatal - put an error messages into the store and then shutdown
337 * @dev: xenbus device
338 * @err: error to report
339 * @fmt: error message format
340 *
341 * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
342 * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
343 * closedown of this driver and its peer.
344 */
345
346void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
347{
348	va_list ap;
349
350	va_start(ap, fmt);
351	xenbus_va_dev_error(dev, err, fmt, ap);
352	va_end(ap);
353
354	xenbus_switch_state(dev, XenbusStateClosing);
355}
356EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
357
358/*
359 * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
360 * avoiding recursion within xenbus_switch_state.
361 */
362static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
363				const char *fmt, ...)
364{
365	va_list ap;
366
367	va_start(ap, fmt);
368	xenbus_va_dev_error(dev, err, fmt, ap);
369	va_end(ap);
370
371	if (!depth)
372		__xenbus_switch_state(dev, XenbusStateClosing, 1);
373}
374
375/*
376 * xenbus_setup_ring
377 * @dev: xenbus device
378 * @vaddr: pointer to starting virtual address of the ring
379 * @nr_pages: number of pages to be granted
380 * @grefs: grant reference array to be filled in
381 *
382 * Allocate physically contiguous pages for a shared ring buffer and grant it
383 * to the peer of the given device. The ring buffer is initially filled with
384 * zeroes. The virtual address of the ring is stored at @vaddr and the
385 * grant references are stored in the @grefs array. In case of error @vaddr
386 * will be set to NULL and @grefs will be filled with INVALID_GRANT_REF.
387 */
388int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr,
389		      unsigned int nr_pages, grant_ref_t *grefs)
390{
391	unsigned long ring_size = nr_pages * XEN_PAGE_SIZE;
392	grant_ref_t gref_head;
393	unsigned int i;
394	void *addr;
395	int ret;
396
397	addr = *vaddr = alloc_pages_exact(ring_size, gfp | __GFP_ZERO);
398	if (!*vaddr) {
399		ret = -ENOMEM;
400		goto err;
401	}
402
403	ret = gnttab_alloc_grant_references(nr_pages, &gref_head);
404	if (ret) {
405		xenbus_dev_fatal(dev, ret, "granting access to %u ring pages",
406				 nr_pages);
407		goto err;
408	}
409
410	for (i = 0; i < nr_pages; i++) {
411		unsigned long gfn;
412
413		if (is_vmalloc_addr(*vaddr))
414			gfn = pfn_to_gfn(vmalloc_to_pfn(addr));
415		else
416			gfn = virt_to_gfn(addr);
417
418		grefs[i] = gnttab_claim_grant_reference(&gref_head);
419		gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
420						gfn, 0);
421
422		addr += XEN_PAGE_SIZE;
423	}
424
425	return 0;
426
427 err:
428	if (*vaddr)
429		free_pages_exact(*vaddr, ring_size);
430	for (i = 0; i < nr_pages; i++)
431		grefs[i] = INVALID_GRANT_REF;
432	*vaddr = NULL;
433
434	return ret;
435}
436EXPORT_SYMBOL_GPL(xenbus_setup_ring);
437
438/*
439 * xenbus_teardown_ring
440 * @vaddr: starting virtual address of the ring
441 * @nr_pages: number of pages
442 * @grefs: grant reference array
443 *
444 * Remove grants for the shared ring buffer and free the associated memory.
445 * On return the grant reference array is filled with INVALID_GRANT_REF.
446 */
447void xenbus_teardown_ring(void **vaddr, unsigned int nr_pages,
448			  grant_ref_t *grefs)
449{
450	unsigned int i;
451
452	for (i = 0; i < nr_pages; i++) {
453		if (grefs[i] != INVALID_GRANT_REF) {
454			gnttab_end_foreign_access(grefs[i], NULL);
455			grefs[i] = INVALID_GRANT_REF;
456		}
457	}
458
459	if (*vaddr)
460		free_pages_exact(*vaddr, nr_pages * XEN_PAGE_SIZE);
461	*vaddr = NULL;
462}
463EXPORT_SYMBOL_GPL(xenbus_teardown_ring);
464
465/*
466 * Allocate an event channel for the given xenbus_device, assigning the newly
467 * created local port to *port.  Return 0 on success, or -errno on error.  On
468 * error, the device will switch to XenbusStateClosing, and the error will be
469 * saved in the store.
470 */
471int xenbus_alloc_evtchn(struct xenbus_device *dev, evtchn_port_t *port)
472{
473	struct evtchn_alloc_unbound alloc_unbound;
474	int err;
475
476	alloc_unbound.dom = DOMID_SELF;
477	alloc_unbound.remote_dom = dev->otherend_id;
478
479	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
480					  &alloc_unbound);
481	if (err)
482		xenbus_dev_fatal(dev, err, "allocating event channel");
483	else
484		*port = alloc_unbound.port;
485
486	return err;
487}
488EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
489
490
491/*
492 * Free an existing event channel. Returns 0 on success or -errno on error.
493 */
494int xenbus_free_evtchn(struct xenbus_device *dev, evtchn_port_t port)
495{
496	struct evtchn_close close;
497	int err;
498
499	close.port = port;
500
501	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
502	if (err)
503		xenbus_dev_error(dev, err, "freeing event channel %u", port);
504
505	return err;
506}
507EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
508
509
510/**
511 * xenbus_map_ring_valloc - allocate & map pages of VA space
512 * @dev: xenbus device
513 * @gnt_refs: grant reference array
514 * @nr_grefs: number of grant references
515 * @vaddr: pointer to address to be filled out by mapping
516 *
517 * Map @nr_grefs pages of memory into this domain from another
518 * domain's grant table.  xenbus_map_ring_valloc allocates @nr_grefs
519 * pages of virtual address space, maps the pages to that address, and sets
520 * *vaddr to that address.  If an error is returned, device will switch to
521 * XenbusStateClosing and the error message will be saved in XenStore.
522 *
523 * Returns: %0 on success or -errno on error
524 */
525int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs,
526			   unsigned int nr_grefs, void **vaddr)
527{
528	int err;
529	struct map_ring_valloc *info;
530
531	*vaddr = NULL;
532
533	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
534		return -EINVAL;
535
536	info = kzalloc(sizeof(*info), GFP_KERNEL);
537	if (!info)
538		return -ENOMEM;
539
540	info->node = kzalloc(sizeof(*info->node), GFP_KERNEL);
541	if (!info->node)
542		err = -ENOMEM;
543	else
544		err = ring_ops->map(dev, info, gnt_refs, nr_grefs, vaddr);
545
546	kfree(info->node);
547	kfree(info);
548	return err;
549}
550EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
551
552/* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned
553 * long), e.g. 32-on-64.  Caller is responsible for preparing the
554 * right array to feed into this function */
555static int __xenbus_map_ring(struct xenbus_device *dev,
556			     grant_ref_t *gnt_refs,
557			     unsigned int nr_grefs,
558			     grant_handle_t *handles,
559			     struct map_ring_valloc *info,
560			     unsigned int flags,
561			     bool *leaked)
562{
563	int i, j;
564
565	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
566		return -EINVAL;
567
568	for (i = 0; i < nr_grefs; i++) {
569		gnttab_set_map_op(&info->map[i], info->phys_addrs[i], flags,
570				  gnt_refs[i], dev->otherend_id);
571		handles[i] = INVALID_GRANT_HANDLE;
572	}
573
574	gnttab_batch_map(info->map, i);
575
576	for (i = 0; i < nr_grefs; i++) {
577		if (info->map[i].status != GNTST_okay) {
578			xenbus_dev_fatal(dev, info->map[i].status,
579					 "mapping in shared page %d from domain %d",
580					 gnt_refs[i], dev->otherend_id);
581			goto fail;
582		} else
583			handles[i] = info->map[i].handle;
584	}
585
586	return 0;
587
588 fail:
589	for (i = j = 0; i < nr_grefs; i++) {
590		if (handles[i] != INVALID_GRANT_HANDLE) {
591			gnttab_set_unmap_op(&info->unmap[j],
592					    info->phys_addrs[i],
593					    GNTMAP_host_map, handles[i]);
594			j++;
595		}
596	}
597
598	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, info->unmap, j));
599
600	*leaked = false;
601	for (i = 0; i < j; i++) {
602		if (info->unmap[i].status != GNTST_okay) {
603			*leaked = true;
604			break;
605		}
606	}
607
608	return -ENOENT;
609}
610
611/**
612 * xenbus_unmap_ring - unmap memory from another domain
613 * @dev: xenbus device
614 * @handles: grant handle array
615 * @nr_handles: number of handles in the array
616 * @vaddrs: addresses to unmap
617 *
618 * Unmap memory in this domain that was imported from another domain.
619 *
620 * Returns: %0 on success or GNTST_* on error
621 * (see xen/include/interface/grant_table.h).
622 */
623static int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles,
624			     unsigned int nr_handles, unsigned long *vaddrs)
625{
626	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
627	int i;
628	int err;
629
630	if (nr_handles > XENBUS_MAX_RING_GRANTS)
631		return -EINVAL;
632
633	for (i = 0; i < nr_handles; i++)
634		gnttab_set_unmap_op(&unmap[i], vaddrs[i],
635				    GNTMAP_host_map, handles[i]);
636
637	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
638
639	err = GNTST_okay;
640	for (i = 0; i < nr_handles; i++) {
641		if (unmap[i].status != GNTST_okay) {
642			xenbus_dev_error(dev, unmap[i].status,
643					 "unmapping page at handle %d error %d",
644					 handles[i], unmap[i].status);
645			err = unmap[i].status;
646			break;
647		}
648	}
649
650	return err;
651}
652
653static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn,
654					    unsigned int goffset,
655					    unsigned int len,
656					    void *data)
657{
658	struct map_ring_valloc *info = data;
659	unsigned long vaddr = (unsigned long)gfn_to_virt(gfn);
660
661	info->phys_addrs[info->idx] = vaddr;
662	info->addrs[info->idx] = vaddr;
663
664	info->idx++;
665}
666
667static int xenbus_map_ring_hvm(struct xenbus_device *dev,
668			       struct map_ring_valloc *info,
669			       grant_ref_t *gnt_ref,
670			       unsigned int nr_grefs,
671			       void **vaddr)
672{
673	struct xenbus_map_node *node = info->node;
674	int err;
675	void *addr;
676	bool leaked = false;
677	unsigned int nr_pages = XENBUS_PAGES(nr_grefs);
678
679	err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages);
680	if (err)
681		goto out_err;
682
683	gnttab_foreach_grant(node->hvm.pages, nr_grefs,
684			     xenbus_map_ring_setup_grant_hvm,
685			     info);
686
687	err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
688				info, GNTMAP_host_map, &leaked);
689	node->nr_handles = nr_grefs;
690
691	if (err)
692		goto out_free_ballooned_pages;
693
694	addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP,
695		    PAGE_KERNEL);
696	if (!addr) {
697		err = -ENOMEM;
698		goto out_xenbus_unmap_ring;
699	}
700
701	node->hvm.addr = addr;
702
703	spin_lock(&xenbus_valloc_lock);
704	list_add(&node->next, &xenbus_valloc_pages);
705	spin_unlock(&xenbus_valloc_lock);
706
707	*vaddr = addr;
708	info->node = NULL;
709
710	return 0;
711
712 out_xenbus_unmap_ring:
713	if (!leaked)
714		xenbus_unmap_ring(dev, node->handles, nr_grefs, info->addrs);
715	else
716		pr_alert("leaking %p size %u page(s)",
717			 addr, nr_pages);
718 out_free_ballooned_pages:
719	if (!leaked)
720		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
721 out_err:
722	return err;
723}
724
725/**
726 * xenbus_unmap_ring_vfree - unmap a page of memory from another domain
727 * @dev: xenbus device
728 * @vaddr: addr to unmap
729 *
730 * Based on Rusty Russell's skeleton driver's unmap_page.
731 * Unmap a page of memory in this domain that was imported from another domain.
732 * Use xenbus_unmap_ring_vfree if you mapped in your memory with
733 * xenbus_map_ring_valloc (it will free the virtual address space).
734 *
735 * Returns: %0 on success or GNTST_* on error
736 * (see xen/include/interface/grant_table.h).
737 */
738int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
739{
740	return ring_ops->unmap(dev, vaddr);
741}
742EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
743
744#ifdef CONFIG_XEN_PV
745static int map_ring_apply(pte_t *pte, unsigned long addr, void *data)
746{
747	struct map_ring_valloc *info = data;
748
749	info->phys_addrs[info->idx++] = arbitrary_virt_to_machine(pte).maddr;
750	return 0;
751}
752
753static int xenbus_map_ring_pv(struct xenbus_device *dev,
754			      struct map_ring_valloc *info,
755			      grant_ref_t *gnt_refs,
756			      unsigned int nr_grefs,
757			      void **vaddr)
758{
759	struct xenbus_map_node *node = info->node;
760	struct vm_struct *area;
761	bool leaked = false;
762	int err = -ENOMEM;
763
764	area = get_vm_area(XEN_PAGE_SIZE * nr_grefs, VM_IOREMAP);
765	if (!area)
766		return -ENOMEM;
767	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
768				XEN_PAGE_SIZE * nr_grefs, map_ring_apply, info))
769		goto failed;
770	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
771				info, GNTMAP_host_map | GNTMAP_contains_pte,
772				&leaked);
773	if (err)
774		goto failed;
775
776	node->nr_handles = nr_grefs;
777	node->pv.area = area;
778
779	spin_lock(&xenbus_valloc_lock);
780	list_add(&node->next, &xenbus_valloc_pages);
781	spin_unlock(&xenbus_valloc_lock);
782
783	*vaddr = area->addr;
784	info->node = NULL;
785
786	return 0;
787
788failed:
789	if (!leaked)
790		free_vm_area(area);
791	else
792		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
793
794	return err;
795}
796
797static int xenbus_unmap_ring_pv(struct xenbus_device *dev, void *vaddr)
798{
799	struct xenbus_map_node *node;
800	struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS];
801	unsigned int level;
802	int i;
803	bool leaked = false;
804	int err;
805
806	spin_lock(&xenbus_valloc_lock);
807	list_for_each_entry(node, &xenbus_valloc_pages, next) {
808		if (node->pv.area->addr == vaddr) {
809			list_del(&node->next);
810			goto found;
811		}
812	}
813	node = NULL;
814 found:
815	spin_unlock(&xenbus_valloc_lock);
816
817	if (!node) {
818		xenbus_dev_error(dev, -ENOENT,
819				 "can't find mapped virtual address %p", vaddr);
820		return GNTST_bad_virt_addr;
821	}
822
823	for (i = 0; i < node->nr_handles; i++) {
824		unsigned long addr;
825
826		memset(&unmap[i], 0, sizeof(unmap[i]));
827		addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i);
828		unmap[i].host_addr = arbitrary_virt_to_machine(
829			lookup_address(addr, &level)).maddr;
830		unmap[i].dev_bus_addr = 0;
831		unmap[i].handle = node->handles[i];
832	}
833
834	BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i));
835
836	err = GNTST_okay;
837	leaked = false;
838	for (i = 0; i < node->nr_handles; i++) {
839		if (unmap[i].status != GNTST_okay) {
840			leaked = true;
841			xenbus_dev_error(dev, unmap[i].status,
842					 "unmapping page at handle %d error %d",
843					 node->handles[i], unmap[i].status);
844			err = unmap[i].status;
845			break;
846		}
847	}
848
849	if (!leaked)
850		free_vm_area(node->pv.area);
851	else
852		pr_alert("leaking VM area %p size %u page(s)",
853			 node->pv.area, node->nr_handles);
854
855	kfree(node);
856	return err;
857}
858
859static const struct xenbus_ring_ops ring_ops_pv = {
860	.map = xenbus_map_ring_pv,
861	.unmap = xenbus_unmap_ring_pv,
862};
863#endif
864
865struct unmap_ring_hvm
866{
867	unsigned int idx;
868	unsigned long addrs[XENBUS_MAX_RING_GRANTS];
869};
870
871static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn,
872					      unsigned int goffset,
873					      unsigned int len,
874					      void *data)
875{
876	struct unmap_ring_hvm *info = data;
877
878	info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn);
879
880	info->idx++;
881}
882
883static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr)
884{
885	int rv;
886	struct xenbus_map_node *node;
887	void *addr;
888	struct unmap_ring_hvm info = {
889		.idx = 0,
890	};
891	unsigned int nr_pages;
892
893	spin_lock(&xenbus_valloc_lock);
894	list_for_each_entry(node, &xenbus_valloc_pages, next) {
895		addr = node->hvm.addr;
896		if (addr == vaddr) {
897			list_del(&node->next);
898			goto found;
899		}
900	}
901	node = addr = NULL;
902 found:
903	spin_unlock(&xenbus_valloc_lock);
904
905	if (!node) {
906		xenbus_dev_error(dev, -ENOENT,
907				 "can't find mapped virtual address %p", vaddr);
908		return GNTST_bad_virt_addr;
909	}
910
911	nr_pages = XENBUS_PAGES(node->nr_handles);
912
913	gnttab_foreach_grant(node->hvm.pages, node->nr_handles,
914			     xenbus_unmap_ring_setup_grant_hvm,
915			     &info);
916
917	rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
918			       info.addrs);
919	if (!rv) {
920		vunmap(vaddr);
921		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
922	}
923	else
924		WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages);
925
926	kfree(node);
927	return rv;
928}
929
930/**
931 * xenbus_read_driver_state - read state from a store path
932 * @path: path for driver
933 *
934 * Returns: the state of the driver rooted at the given store path, or
935 * XenbusStateUnknown if no state can be read.
936 */
937enum xenbus_state xenbus_read_driver_state(const char *path)
938{
939	enum xenbus_state result;
940	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
941	if (err)
942		result = XenbusStateUnknown;
943
944	return result;
945}
946EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
947
948static const struct xenbus_ring_ops ring_ops_hvm = {
949	.map = xenbus_map_ring_hvm,
950	.unmap = xenbus_unmap_ring_hvm,
951};
952
953void __init xenbus_ring_ops_init(void)
954{
955#ifdef CONFIG_XEN_PV
956	if (!xen_feature(XENFEAT_auto_translated_physmap))
957		ring_ops = &ring_ops_pv;
958	else
959#endif
960		ring_ops = &ring_ops_hvm;
961}
962