1/* $NetBSD: balloon.c,v 1.23 2020/05/06 19:52:19 bouyer Exp $ */
2
3/*-
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Cherry G. Mathew <cherry@zyx.in> and
9 * Jean-Yves Migeon <jym@NetBSD.org>
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * The Xen balloon driver enables growing and shrinking PV domains
35 * memory on the fly, by allocating and freeing memory pages directly.
36 * This management needs domain cooperation to work properly, especially
37 * during balloon_inflate() operation where a domain gives back memory to
38 * the hypervisor.
39 *
40 * Shrinking memory on a live system is a difficult task, and may render
41 * it unstable or lead to crash. The driver takes a conservative approach
42 * there by doing memory operations in smal steps of a few MiB each time. It
43 * will also refuse to decrease reservation below a certain threshold
44 * (XEN_RESERVATION_MIN), so as to avoid a complete kernel memory exhaustion.
45 *
46 * The user can intervene at two different levels to manage the ballooning
47 * of a domain:
48 * - directly within the domain using a sysctl(9) interface.
49 * - through the Xentools, by modifying the memory/target entry associated
50 *   to a domain. This is usually done in dom0.
51 *
52 * Modification of the reservation is signaled by writing inside the
53 * memory/target node in Xenstore. Writing new values will fire the xenbus
54 * watcher, and wakeup the balloon thread to inflate or deflate balloon.
55 *
56 * Both sysctl(9) nodes and memory/target entry assume that the values passed
57 * to them are in KiB. Internally, the driver will convert this value in
58 * pages (assuming a page is PAGE_SIZE bytes), and issue the correct hypercalls
59 * to decrease/increase domain's reservation accordingly.
60 *
61 * XXX Pages used by balloon are tracked through entries stored in a SLIST.
62 * This allows driver to conveniently add/remove wired pages from memory
63 * without the need to support these "memory gaps" inside uvm(9). Still, the
64 * driver does not currently "plug" new pages into uvm(9) when more memory
65 * is available than originally managed by balloon. For example, deflating
66 * balloon with a total number of pages above physmem is not supported for
67 * now. See balloon_deflate() for more details.
68 *
69 */
70
71#define BALLOONDEBUG 0
72
73#if defined(_KERNEL_OPT)
74#include "opt_uvm_hotplug.h"
75#endif
76
77#include <sys/cdefs.h>
78__KERNEL_RCSID(0, "$NetBSD: balloon.c,v 1.23 2020/05/06 19:52:19 bouyer Exp $");
79
80#include <sys/inttypes.h>
81#include <sys/device.h>
82#include <sys/param.h>
83
84#include <sys/atomic.h>
85#include <sys/condvar.h>
86#include <sys/kernel.h>
87#include <sys/kmem.h>
88#include <sys/kthread.h>
89#include <sys/mutex.h>
90#include <sys/pool.h>
91#include <sys/queue.h>
92#include <sys/sysctl.h>
93
94#include <xen/xen.h>
95#include <xen/xenbus.h>
96#include <xen/balloon.h>
97
98#include <uvm/uvm.h>
99#include <uvm/uvm.h>
100#include <uvm/uvm_physseg.h>
101#include <xen/xenpmap.h>
102
103#include "locators.h"
104
105/*
106 * Number of MFNs stored in the array passed back and forth between domain
107 * and balloon/hypervisor, during balloon_inflate() / balloon_deflate(). These
108 * should fit in a page, for performance reasons.
109 */
110#define BALLOON_DELTA (PAGE_SIZE / sizeof(xen_pfn_t))
111
112/*
113 * Safeguard value. Refuse to go below this threshold, so that domain
114 * can keep some free pages for its own use. Value is arbitrary, and may
115 * evolve with time.
116 */
117#define BALLOON_BALLAST 256 /* In pages - 1MiB */
118#define XEN_RESERVATION_MIN (uvmexp.freemin + BALLOON_BALLAST) /* In pages */
119
120/* KB <-> PAGEs */
121#define PAGE_SIZE_KB (PAGE_SIZE >> 10) /* page size in KB */
122#define BALLOON_PAGES_TO_KB(_pg) ((uint64_t)_pg * PAGE_SIZE_KB)
123#define BALLOON_KB_TO_PAGES(_kb) (roundup(_kb, PAGE_SIZE_KB) / PAGE_SIZE_KB)
124
125/*
126 * A balloon page entry. Needed to track pages put/reclaimed from balloon
127 */
128struct balloon_page_entry {
129	struct vm_page *pg;
130	SLIST_ENTRY(balloon_page_entry) entry;
131};
132
133struct balloon_xenbus_softc {
134	device_t sc_dev;
135	struct sysctllog *sc_log;
136
137	kmutex_t balloon_mtx;   /* Protects condvar, target and res_min (below) */
138	kcondvar_t balloon_cv;  /* Condvar variable for target (below) */
139	size_t balloon_target;  /* Target domain reservation size in pages. */
140	/* Minimum amount of memory reserved by domain, in KiB */
141	uint64_t balloon_res_min;
142
143	xen_pfn_t *sc_mfn_list; /* List of MFNs passed from/to balloon */
144	pool_cache_t bpge_pool; /* pool cache for balloon page entries */
145	/* linked list for tracking pages used by balloon */
146	SLIST_HEAD(, balloon_page_entry) balloon_page_entries;
147	size_t balloon_num_page_entries;
148};
149
150static size_t xenmem_get_currentreservation(void);
151static size_t xenmem_get_maxreservation(void);
152
153static int  bpge_ctor(void *, void *, int);
154static void bpge_dtor(void *, void *);
155
156static void   balloon_thread(void *);
157static size_t balloon_deflate(struct balloon_xenbus_softc*, size_t);
158static size_t balloon_inflate(struct balloon_xenbus_softc*, size_t);
159
160static void sysctl_kern_xen_balloon_setup(struct balloon_xenbus_softc *);
161static void balloon_xenbus_watcher(struct xenbus_watch *, const char **,
162				   unsigned int);
163
164static int  balloon_xenbus_match(device_t, cfdata_t, void *);
165static void balloon_xenbus_attach(device_t, device_t, void *);
166
167CFATTACH_DECL_NEW(balloon, sizeof(struct balloon_xenbus_softc),
168    balloon_xenbus_match, balloon_xenbus_attach, NULL, NULL);
169
170static struct xenbus_watch balloon_xenbus_watch = {
171	.node = __UNCONST("memory/target"),
172	.xbw_callback = balloon_xenbus_watcher,
173};
174
175static struct balloon_xenbus_softc *balloon_sc;
176
177static int
178balloon_xenbus_match(device_t parent, cfdata_t match, void *aux)
179{
180	struct xenbusdev_attach_args *xa = aux;
181
182	if (strcmp(xa->xa_type, "balloon") != 0)
183		return 0;
184
185	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
186	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
187		return 0;
188
189	return 1;
190}
191
192static void
193balloon_xenbus_attach(device_t parent, device_t self, void *aux)
194{
195	xen_pfn_t *mfn_list;
196	size_t currentpages;
197	struct balloon_xenbus_softc *sc = balloon_sc = device_private(self);
198
199	aprint_normal(": Xen Balloon driver\n");
200	sc->sc_dev = self;
201
202	/* Initialize target mutex and condvar */
203	mutex_init(&sc->balloon_mtx, MUTEX_DEFAULT, IPL_NONE);
204	cv_init(&sc->balloon_cv, "xen_balloon");
205
206	SLIST_INIT(&sc->balloon_page_entries);
207	sc->balloon_num_page_entries = 0;
208
209	/* Get current number of pages */
210	currentpages = xenmem_get_currentreservation();
211
212	KASSERT(currentpages > 0);
213
214	/* Update initial target value - no need to lock for initialization */
215	sc->balloon_target = currentpages;
216
217	/* Set the values used by sysctl */
218	sc->balloon_res_min =
219	    BALLOON_PAGES_TO_KB(XEN_RESERVATION_MIN);
220
221	aprint_normal_dev(self, "current reservation: %"PRIu64" KiB\n",
222	    BALLOON_PAGES_TO_KB(currentpages));
223#if BALLOONDEBUG
224	aprint_normal_dev(self, "min reservation: %"PRIu64" KiB\n",
225	    sc->balloon_res_min);
226	aprint_normal_dev(self, "max reservation: %"PRIu64" KiB\n",
227	    BALLOON_PAGES_TO_KB(xenmem_get_maxreservation()));
228#endif
229
230	sc->bpge_pool = pool_cache_init(sizeof(struct balloon_page_entry),
231	    0, 0, 0, "xen_bpge", NULL, IPL_NONE, bpge_ctor, bpge_dtor, NULL);
232
233	sysctl_kern_xen_balloon_setup(sc);
234
235	/* List of MFNs passed from/to balloon for inflating/deflating */
236	mfn_list = kmem_alloc(BALLOON_DELTA * sizeof(*mfn_list), KM_SLEEP);
237	sc->sc_mfn_list = mfn_list;
238
239	/* Setup xenbus node watch callback */
240	if (register_xenbus_watch(&balloon_xenbus_watch)) {
241		aprint_error_dev(self, "unable to watch memory/target\n");
242		goto error;
243	}
244
245	/* Setup kernel thread to asynchronously (in/de)-flate the balloon */
246	if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, balloon_thread,
247	    sc, NULL, "xen_balloon")) {
248		aprint_error_dev(self, "unable to create balloon thread\n");
249		unregister_xenbus_watch(&balloon_xenbus_watch);
250		goto error;
251	}
252
253	if (!pmf_device_register(self, NULL, NULL))
254		aprint_error_dev(self, "couldn't establish power handler\n");
255
256	return;
257
258error:
259	sysctl_teardown(&sc->sc_log);
260	cv_destroy(&sc->balloon_cv);
261	mutex_destroy(&sc->balloon_mtx);
262	return;
263
264}
265
266/*
267 * Returns maximum memory reservation available to current domain. In Xen
268 * with DOMID_SELF, this hypercall never fails: return value should be
269 * interpreted as unsigned.
270 *
271 */
272static size_t
273xenmem_get_maxreservation(void)
274{
275	unsigned int ret;
276
277	ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation,
278	    & (domid_t) { DOMID_SELF });
279
280	if (ret == 0) {
281		/* well, a maximum reservation of 0 is really bogus */
282		panic("%s failed, maximum reservation returned 0", __func__);
283	}
284
285	return ret;
286}
287
288/* Returns current reservation, in pages */
289static size_t
290xenmem_get_currentreservation(void)
291{
292	int ret;
293
294	ret = HYPERVISOR_memory_op(XENMEM_current_reservation,
295				   & (domid_t) { DOMID_SELF });
296
297	if (ret < 0) {
298		panic("%s failed: %d", __func__, ret);
299	}
300
301	return ret;
302}
303
304/*
305 * Get value (in KiB) of memory/target in XenStore for current domain
306 * A return value of 0 can be considered as bogus or absent.
307 */
308static unsigned long long
309balloon_xenbus_read_target(void)
310{
311	unsigned long long new_target;
312	int err = xenbus_read_ull(NULL, "memory", "target", &new_target, 0);
313
314	switch(err) {
315	case 0:
316		return new_target;
317	case ENOENT:
318		break;
319	default:
320		device_printf(balloon_sc->sc_dev,
321		    "error %d, couldn't read xenbus target node\n", err);
322		break;
323	}
324
325	return 0;
326}
327
328/* Set memory/target value (in KiB) in XenStore for current domain */
329static void
330balloon_xenbus_write_target(unsigned long long new_target)
331{
332	int err = xenbus_printf(NULL, "memory", "target", "%llu", new_target);
333
334	if (err != 0) {
335		device_printf(balloon_sc->sc_dev,
336		    "error %d, couldn't write xenbus target node\n", err);
337	}
338
339	return;
340}
341
342static int
343bpge_ctor(void *arg, void *obj, int flags)
344{
345	struct balloon_page_entry *bpge = obj;
346
347	bpge->pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
348	if (bpge->pg == NULL)
349		return ENOMEM;
350
351	return 0;
352
353}
354
355static void
356bpge_dtor(void *arg, void *obj)
357{
358	struct balloon_page_entry *bpge = obj;
359
360	uvm_pagefree(bpge->pg);
361}
362
363/*
364 * Inflate balloon. Pages are moved out of domain's memory towards balloon.
365 */
366static size_t
367balloon_inflate(struct balloon_xenbus_softc *sc, size_t tpages)
368{
369	int rpages, ret;
370	paddr_t pa;
371	struct balloon_page_entry *bpg_entry;
372	xen_pfn_t *mfn_list = sc->sc_mfn_list;
373
374	struct xen_memory_reservation reservation = {
375		.mem_flags = 0,
376		.extent_order = 0,
377		.domid        = DOMID_SELF
378	};
379
380	KASSERT(tpages > 0);
381	KASSERT(tpages <= BALLOON_DELTA);
382
383	memset(mfn_list, 0, BALLOON_DELTA * sizeof(*mfn_list));
384
385	/* allocate pages that will be given to Hypervisor */
386	for (rpages = 0; rpages < tpages; rpages++) {
387
388		bpg_entry = pool_cache_get(sc->bpge_pool, PR_WAITOK);
389		if (bpg_entry == NULL) {
390			/* failed reserving a page for balloon */
391			break;
392		}
393
394		pa = VM_PAGE_TO_PHYS(bpg_entry->pg);
395
396		mfn_list[rpages] = xpmap_ptom(pa) >> PAGE_SHIFT;
397
398		/* Invalidate pg */
399		xpmap_ptom_unmap(pa);
400
401		SLIST_INSERT_HEAD(&balloon_sc->balloon_page_entries,
402				  bpg_entry, entry);
403		balloon_sc->balloon_num_page_entries++;
404	}
405
406	/* Hand over pages to Hypervisor */
407	set_xen_guest_handle(reservation.extent_start, mfn_list);
408	reservation.nr_extents = rpages;
409
410	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
411				   &reservation);
412	if (ret != rpages) {
413		/*
414		 * we are in bad shape: the operation failed for certain
415		 * MFNs. As the API does not allow us to know which frame
416		 * numbers were erroneous, we cannot really recover safely.
417		 */
418		panic("%s: decrease reservation failed: was %d, "
419		    "returned %d", device_xname(sc->sc_dev), rpages, ret);
420	}
421
422#if BALLOONDEBUG
423	device_printf(sc->sc_dev, "inflate %zu => inflated by %d\n",
424	    tpages, rpages);
425#endif
426	return rpages;
427}
428
429/*
430 * Deflate balloon. Pages are given back to domain's memory.
431 */
432static size_t
433balloon_deflate(struct balloon_xenbus_softc *sc, size_t tpages)
434{
435	int rpages, s, ret;
436	paddr_t pa;
437	struct balloon_page_entry *bpg_entry;
438	xen_pfn_t *mfn_list = sc->sc_mfn_list;
439
440	struct xen_memory_reservation reservation = {
441		.mem_flags = 0,
442		.extent_order = 0,
443		.domid        = DOMID_SELF
444	};
445
446	KASSERT(tpages > 0);
447	KASSERT(tpages <= BALLOON_DELTA);
448
449	memset(mfn_list, 0, BALLOON_DELTA * sizeof(*mfn_list));
450
451#ifndef UVM_HOTPLUG
452	/*
453	 * If the list is empty, we are deflating balloon beyond empty. This
454	 * is currently unsupported as this would require to dynamically add
455	 * new memory pages inside uvm(9) and instruct pmap(9) on how to
456	 * handle them. For now, we clip reservation up to the point we
457	 * can manage them, eg. the remaining bpg entries in the SLIST.
458	 * XXX find a way to hotplug memory through uvm(9)/pmap(9).
459	 */
460	if (tpages > sc->balloon_num_page_entries) {
461		device_printf(sc->sc_dev,
462		    "memory 'hot-plug' unsupported - clipping "
463		    "reservation %zu => %zu pages.\n",
464		    tpages, sc->balloon_num_page_entries);
465		tpages = sc->balloon_num_page_entries;
466	}
467#endif
468
469	/* reclaim pages from balloon */
470	set_xen_guest_handle(reservation.extent_start, mfn_list);
471	reservation.nr_extents = tpages;
472
473	ret = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
474
475	if (ret < 0) {
476		panic("%s: increase reservation failed, ret %d",
477		    device_xname(sc->sc_dev), ret);
478	}
479
480	if (ret != tpages) {
481		device_printf(sc->sc_dev,
482		    "increase reservation incomplete: was %zu, "
483		    "returned %d\n", tpages, ret);
484	}
485
486	/* plug pages back into memory through bpge entries */
487	for (rpages = 0; rpages < ret; rpages++) {
488#ifdef UVM_HOTPLUG
489		extern paddr_t pmap_pa_end;
490		if (sc->balloon_num_page_entries == 0) { /*XXX: consolidate */
491			/* "hot-plug": Stick it at the end of memory */
492			pa = pmap_pa_end;
493
494			/* P2M update */
495#if defined(_LP64) || defined(PAE)
496			atomic_add_64(&pmap_pa_end, PAGE_SIZE);
497#else
498			atomic_add_32(&pmap_pa_end, PAGE_SIZE);
499#endif
500			s = splvm();
501			xpmap_ptom_map(pa, ptoa(mfn_list[rpages]));
502			xpq_queue_machphys_update(ptoa(mfn_list[rpages]), pa);
503			xpq_flush_queue();
504			splx(s);
505
506			if (uvm_physseg_plug(atop(pa), 1, NULL) == false) {
507				/* Undo P2M */
508				s = splvm();
509				xpmap_ptom_unmap(pa);
510				xpq_queue_machphys_update(ptoa(mfn_list[rpages]), 0);
511				xpq_flush_queue();
512				splx(s);
513#if defined(_LP64) || defined(PAE)
514				atomic_add_64(&pmap_pa_end, -PAGE_SIZE);
515#else
516				atomic_add_32(&pmap_pa_end, -PAGE_SIZE);
517#endif
518				break;
519			}
520			continue;
521		}
522#else
523		if (sc->balloon_num_page_entries == 0) {
524			/*
525			 * XXX This is the case where extra "hot-plug"
526			 * mem w.r.t boot comes in
527			 */
528			device_printf(sc->sc_dev,
529			    "List empty. Cannot be collapsed further!\n");
530			break;
531		}
532#endif
533		bpg_entry = SLIST_FIRST(&balloon_sc->balloon_page_entries);
534		SLIST_REMOVE_HEAD(&balloon_sc->balloon_page_entries, entry);
535		balloon_sc->balloon_num_page_entries--;
536
537		/* Update P->M */
538		pa = VM_PAGE_TO_PHYS(bpg_entry->pg);
539
540		s = splvm();
541		xpmap_ptom_map(pa, ptoa(mfn_list[rpages]));
542		xpq_queue_machphys_update(ptoa(mfn_list[rpages]), pa);
543		xpq_flush_queue();
544		splx(s);
545
546		pool_cache_put(sc->bpge_pool, bpg_entry);
547	}
548
549#if BALLOONDEBUG
550	device_printf(sc->sc_dev, "deflate %zu => deflated by %d\n",
551	    tpages, rpages);
552#endif
553	return rpages;
554}
555
556/*
557 * The balloon thread is responsible for handling inflate/deflate balloon
558 * requests for the current domain given the new "target" value.
559 */
560static void
561balloon_thread(void *cookie)
562{
563	int ret;
564	size_t current, diff, target;
565	struct balloon_xenbus_softc *sc = cookie;
566
567	for/*ever*/ (;;) {
568		current = xenmem_get_currentreservation();
569
570		/*
571		 * We assume that balloon_xenbus_watcher() and
572		 * sysctl(9) handlers checked the sanity of the
573		 * new target value.
574		 */
575		mutex_enter(&sc->balloon_mtx);
576		target = sc->balloon_target;
577		if (current != target) {
578			/*
579			 * There is work to do. Inflate/deflate in
580			 * increments of BALLOON_DELTA pages at maximum. The
581			 * risk of integer wrapping is mitigated by
582			 * BALLOON_DELTA, which is the upper bound.
583			 */
584			mutex_exit(&sc->balloon_mtx);
585			diff = MIN(target - current, BALLOON_DELTA);
586			if (current < target)
587				ret = balloon_deflate(sc, diff);
588			else
589				ret = balloon_inflate(sc, diff);
590
591			if (ret != diff) {
592				/*
593				 * Something went wrong during operation.
594				 * Log error then feedback current value in
595				 * target so that thread gets back to waiting
596				 * for the next iteration
597				 */
598				device_printf(sc->sc_dev,
599				    "WARNING: balloon could not reach target "
600				    "%zu (current %zu)\n",
601				    target, current);
602				current = xenmem_get_currentreservation();
603				mutex_enter(&sc->balloon_mtx);
604				sc->balloon_target = current;
605				mutex_exit(&sc->balloon_mtx);
606			}
607		} else {
608			/* no need for change -- wait for a signal */
609			cv_wait(&sc->balloon_cv, &sc->balloon_mtx);
610			mutex_exit(&sc->balloon_mtx);
611		}
612	}
613}
614
615/*
616 * Handler called when memory/target value changes inside Xenstore.
617 * All sanity checks must also happen in this handler, as it is the common
618 * entry point where controller domain schedules balloon operations.
619 */
620static void
621balloon_xenbus_watcher(struct xenbus_watch *watch, const char **vec,
622		       unsigned int len)
623{
624	size_t new_target;
625	uint64_t target_kb, target_max, target_min;
626
627	target_kb = balloon_xenbus_read_target();
628	if (target_kb == 0) {
629		/* bogus -- just return */
630		return;
631	}
632
633	mutex_enter(&balloon_sc->balloon_mtx);
634	target_min = balloon_sc->balloon_res_min;
635	mutex_exit(&balloon_sc->balloon_mtx);
636	if (target_kb < target_min) {
637		device_printf(balloon_sc->sc_dev,
638		    "new target %"PRIu64" is below min %"PRIu64"\n",
639		    target_kb, target_min);
640		return;
641	}
642
643	target_max = BALLOON_PAGES_TO_KB(xenmem_get_maxreservation());
644	if (target_kb > target_max) {
645		/*
646		 * Should not happen. Hypervisor should block balloon
647		 * requests above mem-max.
648		 */
649		device_printf(balloon_sc->sc_dev,
650		    "new target %"PRIu64" is above max %"PRIu64"\n",
651		    target_kb, target_max);
652		return;
653	}
654
655	new_target = BALLOON_KB_TO_PAGES(target_kb);
656
657	device_printf(balloon_sc->sc_dev,
658	    "current reservation: %zu pages => target: %zu pages\n",
659	    xenmem_get_currentreservation(), new_target);
660
661	/* Only update target if its value changes */
662	mutex_enter(&balloon_sc->balloon_mtx);
663	if (balloon_sc->balloon_target != new_target) {
664		balloon_sc->balloon_target = new_target;
665		cv_signal(&balloon_sc->balloon_cv);
666	}
667	mutex_exit(&balloon_sc->balloon_mtx);
668
669	return;
670}
671
672/*
673 * sysctl(9) stuff
674 */
675
676/* routine to control the minimum memory reserved for the domain */
677static int
678sysctl_kern_xen_balloon_min(SYSCTLFN_ARGS)
679{
680	struct sysctlnode node;
681	u_quad_t newval;
682	int error;
683
684	node = *rnode;
685	node.sysctl_data = &newval;
686
687	mutex_enter(&balloon_sc->balloon_mtx);
688	newval = balloon_sc->balloon_res_min;
689	mutex_exit(&balloon_sc->balloon_mtx);
690
691	error = sysctl_lookup(SYSCTLFN_CALL(&node));
692	if (error || newp == NULL)
693		return error;
694
695	/* Safeguard value: refuse to go below. */
696	if (newval < XEN_RESERVATION_MIN) {
697		device_printf(balloon_sc->sc_dev,
698		    "cannot set min below minimum safe value (%d)\n",
699		    XEN_RESERVATION_MIN);
700		return EPERM;
701	}
702
703	mutex_enter(&balloon_sc->balloon_mtx);
704	if (balloon_sc->balloon_res_min != newval)
705		balloon_sc->balloon_res_min = newval;
706	mutex_exit(&balloon_sc->balloon_mtx);
707
708	return 0;
709}
710
711/* Returns the maximum memory reservation of the domain */
712static int
713sysctl_kern_xen_balloon_max(SYSCTLFN_ARGS)
714{
715	struct sysctlnode node;
716	u_quad_t node_val;
717
718	node = *rnode;
719
720	node_val = BALLOON_PAGES_TO_KB(xenmem_get_maxreservation());
721	node.sysctl_data = &node_val;
722	return sysctl_lookup(SYSCTLFN_CALL(&node));
723}
724
725/* Returns the current memory reservation of the domain */
726static int
727sysctl_kern_xen_balloon_current(SYSCTLFN_ARGS)
728{
729	struct sysctlnode node;
730	u_quad_t node_val;
731
732	node = *rnode;
733
734	node_val = BALLOON_PAGES_TO_KB(xenmem_get_currentreservation());
735	node.sysctl_data = &node_val;
736	return sysctl_lookup(SYSCTLFN_CALL(&node));
737}
738
739/*
740 * Returns the target memory reservation of the domain
741 * When reading, this sysctl will return the value of the balloon_target
742 * variable, converted into KiB
743 * When used for writing, it will update the new memory/target value
744 * in XenStore, but will not update the balloon_target variable directly.
745 * This will be done by the Xenbus watch handler, balloon_xenbus_watcher().
746 */
747static int
748sysctl_kern_xen_balloon_target(SYSCTLFN_ARGS)
749{
750	struct sysctlnode node;
751	u_quad_t newval, res_min, res_max;
752	int error;
753
754	node = *rnode;
755	node.sysctl_data = &newval;
756
757	mutex_enter(&balloon_sc->balloon_mtx);
758	newval = BALLOON_PAGES_TO_KB(balloon_sc->balloon_target);
759	res_min = balloon_sc->balloon_res_min;
760	mutex_exit(&balloon_sc->balloon_mtx);
761
762	error = sysctl_lookup(SYSCTLFN_CALL(&node));
763	if (newp == NULL || error != 0) {
764		return error;
765	}
766
767	/*
768	 * Sanity check new size
769	 * We should not balloon below the minimum reservation
770	 * set by the domain, nor above the maximum reservation set
771	 * by domain controller.
772	 * Note: domain is not supposed to receive balloon requests when
773	 * they are above maximum reservation, but better be safe than
774	 * sorry.
775	 */
776	res_max = BALLOON_PAGES_TO_KB(xenmem_get_maxreservation());
777	if (newval < res_min || newval > res_max) {
778#if BALLOONDEBUG
779		device_printf(balloon_sc->sc_dev,
780		    "new value out of bounds: %"PRIu64"\n", newval);
781		device_printf(balloon_sc->sc_dev,
782		    "min %"PRIu64", max %"PRIu64"\n", res_min, res_max);
783#endif
784		return EPERM;
785	}
786
787	/*
788	 * Write new value inside Xenstore. This will fire the memory/target
789	 * watch handler, balloon_xenbus_watcher().
790	 */
791	balloon_xenbus_write_target(newval);
792
793	return 0;
794}
795
796/* sysctl(9) nodes creation */
797static void
798sysctl_kern_xen_balloon_setup(struct balloon_xenbus_softc *sc)
799{
800	const struct sysctlnode *node = NULL;
801	struct sysctllog **clog = &sc->sc_log;
802
803	sysctl_createv(clog, 0, NULL, &node,
804	    CTLFLAG_PERMANENT,
805	    CTLTYPE_NODE, "machdep", NULL,
806	    NULL, 0, NULL, 0,
807	    CTL_MACHDEP, CTL_EOL);
808
809	sysctl_createv(clog, 0, &node, &node,
810	    CTLFLAG_PERMANENT,
811	    CTLTYPE_NODE, "xen",
812	    SYSCTL_DESCR("Xen top level node"),
813	    NULL, 0, NULL, 0,
814	    CTL_CREATE, CTL_EOL);
815
816	sysctl_createv(clog, 0, &node, &node,
817	    CTLFLAG_PERMANENT,
818	    CTLTYPE_NODE, "balloon",
819	    SYSCTL_DESCR("Balloon details"),
820	    NULL, 0, NULL, 0,
821	    CTL_CREATE, CTL_EOL);
822
823	sysctl_createv(clog, 0, &node, NULL,
824	    CTLFLAG_PERMANENT | CTLFLAG_READONLY,
825	    CTLTYPE_QUAD, "current",
826	    SYSCTL_DESCR("Domain's current memory reservation from "
827		"hypervisor, in KiB."),
828	    sysctl_kern_xen_balloon_current, 0, NULL, 0,
829	    CTL_CREATE, CTL_EOL);
830
831	sysctl_createv(clog, 0, &node, NULL,
832	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
833	    CTLTYPE_QUAD, "target",
834	    SYSCTL_DESCR("Target memory reservation for domain, in KiB."),
835	    sysctl_kern_xen_balloon_target, 0, NULL, 0,
836	    CTL_CREATE, CTL_EOL);
837
838	sysctl_createv(clog, 0, &node, NULL,
839	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
840	    CTLTYPE_QUAD, "min",
841	    SYSCTL_DESCR("Minimum amount of memory the domain "
842		"reserves, in KiB."),
843	    sysctl_kern_xen_balloon_min, 0, NULL, 0,
844	    CTL_CREATE, CTL_EOL);
845
846	sysctl_createv(clog, 0, &node, NULL,
847	    CTLFLAG_PERMANENT | CTLFLAG_READONLY,
848	    CTLTYPE_QUAD, "max",
849	    SYSCTL_DESCR("Maximum amount of memory the domain "
850		"can use, in KiB."),
851	    sysctl_kern_xen_balloon_max, 0, NULL, 0,
852	    CTL_CREATE, CTL_EOL);
853}
854