1/* $NetBSD: balloon.c,v 1.23 2020/05/06 19:52:19 bouyer Exp $ */ 2 3/*- 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Cherry G. Mathew <cherry@zyx.in> and 9 * Jean-Yves Migeon <jym@NetBSD.org> 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * The Xen balloon driver enables growing and shrinking PV domains 35 * memory on the fly, by allocating and freeing memory pages directly. 36 * This management needs domain cooperation to work properly, especially 37 * during balloon_inflate() operation where a domain gives back memory to 38 * the hypervisor. 39 * 40 * Shrinking memory on a live system is a difficult task, and may render 41 * it unstable or lead to crash. The driver takes a conservative approach 42 * there by doing memory operations in smal steps of a few MiB each time. It 43 * will also refuse to decrease reservation below a certain threshold 44 * (XEN_RESERVATION_MIN), so as to avoid a complete kernel memory exhaustion. 45 * 46 * The user can intervene at two different levels to manage the ballooning 47 * of a domain: 48 * - directly within the domain using a sysctl(9) interface. 49 * - through the Xentools, by modifying the memory/target entry associated 50 * to a domain. This is usually done in dom0. 51 * 52 * Modification of the reservation is signaled by writing inside the 53 * memory/target node in Xenstore. Writing new values will fire the xenbus 54 * watcher, and wakeup the balloon thread to inflate or deflate balloon. 55 * 56 * Both sysctl(9) nodes and memory/target entry assume that the values passed 57 * to them are in KiB. Internally, the driver will convert this value in 58 * pages (assuming a page is PAGE_SIZE bytes), and issue the correct hypercalls 59 * to decrease/increase domain's reservation accordingly. 60 * 61 * XXX Pages used by balloon are tracked through entries stored in a SLIST. 62 * This allows driver to conveniently add/remove wired pages from memory 63 * without the need to support these "memory gaps" inside uvm(9). Still, the 64 * driver does not currently "plug" new pages into uvm(9) when more memory 65 * is available than originally managed by balloon. For example, deflating 66 * balloon with a total number of pages above physmem is not supported for 67 * now. See balloon_deflate() for more details. 68 * 69 */ 70 71#define BALLOONDEBUG 0 72 73#if defined(_KERNEL_OPT) 74#include "opt_uvm_hotplug.h" 75#endif 76 77#include <sys/cdefs.h> 78__KERNEL_RCSID(0, "$NetBSD: balloon.c,v 1.23 2020/05/06 19:52:19 bouyer Exp $"); 79 80#include <sys/inttypes.h> 81#include <sys/device.h> 82#include <sys/param.h> 83 84#include <sys/atomic.h> 85#include <sys/condvar.h> 86#include <sys/kernel.h> 87#include <sys/kmem.h> 88#include <sys/kthread.h> 89#include <sys/mutex.h> 90#include <sys/pool.h> 91#include <sys/queue.h> 92#include <sys/sysctl.h> 93 94#include <xen/xen.h> 95#include <xen/xenbus.h> 96#include <xen/balloon.h> 97 98#include <uvm/uvm.h> 99#include <uvm/uvm.h> 100#include <uvm/uvm_physseg.h> 101#include <xen/xenpmap.h> 102 103#include "locators.h" 104 105/* 106 * Number of MFNs stored in the array passed back and forth between domain 107 * and balloon/hypervisor, during balloon_inflate() / balloon_deflate(). These 108 * should fit in a page, for performance reasons. 109 */ 110#define BALLOON_DELTA (PAGE_SIZE / sizeof(xen_pfn_t)) 111 112/* 113 * Safeguard value. Refuse to go below this threshold, so that domain 114 * can keep some free pages for its own use. Value is arbitrary, and may 115 * evolve with time. 116 */ 117#define BALLOON_BALLAST 256 /* In pages - 1MiB */ 118#define XEN_RESERVATION_MIN (uvmexp.freemin + BALLOON_BALLAST) /* In pages */ 119 120/* KB <-> PAGEs */ 121#define PAGE_SIZE_KB (PAGE_SIZE >> 10) /* page size in KB */ 122#define BALLOON_PAGES_TO_KB(_pg) ((uint64_t)_pg * PAGE_SIZE_KB) 123#define BALLOON_KB_TO_PAGES(_kb) (roundup(_kb, PAGE_SIZE_KB) / PAGE_SIZE_KB) 124 125/* 126 * A balloon page entry. Needed to track pages put/reclaimed from balloon 127 */ 128struct balloon_page_entry { 129 struct vm_page *pg; 130 SLIST_ENTRY(balloon_page_entry) entry; 131}; 132 133struct balloon_xenbus_softc { 134 device_t sc_dev; 135 struct sysctllog *sc_log; 136 137 kmutex_t balloon_mtx; /* Protects condvar, target and res_min (below) */ 138 kcondvar_t balloon_cv; /* Condvar variable for target (below) */ 139 size_t balloon_target; /* Target domain reservation size in pages. */ 140 /* Minimum amount of memory reserved by domain, in KiB */ 141 uint64_t balloon_res_min; 142 143 xen_pfn_t *sc_mfn_list; /* List of MFNs passed from/to balloon */ 144 pool_cache_t bpge_pool; /* pool cache for balloon page entries */ 145 /* linked list for tracking pages used by balloon */ 146 SLIST_HEAD(, balloon_page_entry) balloon_page_entries; 147 size_t balloon_num_page_entries; 148}; 149 150static size_t xenmem_get_currentreservation(void); 151static size_t xenmem_get_maxreservation(void); 152 153static int bpge_ctor(void *, void *, int); 154static void bpge_dtor(void *, void *); 155 156static void balloon_thread(void *); 157static size_t balloon_deflate(struct balloon_xenbus_softc*, size_t); 158static size_t balloon_inflate(struct balloon_xenbus_softc*, size_t); 159 160static void sysctl_kern_xen_balloon_setup(struct balloon_xenbus_softc *); 161static void balloon_xenbus_watcher(struct xenbus_watch *, const char **, 162 unsigned int); 163 164static int balloon_xenbus_match(device_t, cfdata_t, void *); 165static void balloon_xenbus_attach(device_t, device_t, void *); 166 167CFATTACH_DECL_NEW(balloon, sizeof(struct balloon_xenbus_softc), 168 balloon_xenbus_match, balloon_xenbus_attach, NULL, NULL); 169 170static struct xenbus_watch balloon_xenbus_watch = { 171 .node = __UNCONST("memory/target"), 172 .xbw_callback = balloon_xenbus_watcher, 173}; 174 175static struct balloon_xenbus_softc *balloon_sc; 176 177static int 178balloon_xenbus_match(device_t parent, cfdata_t match, void *aux) 179{ 180 struct xenbusdev_attach_args *xa = aux; 181 182 if (strcmp(xa->xa_type, "balloon") != 0) 183 return 0; 184 185 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT && 186 match->cf_loc[XENBUSCF_ID] != xa->xa_id) 187 return 0; 188 189 return 1; 190} 191 192static void 193balloon_xenbus_attach(device_t parent, device_t self, void *aux) 194{ 195 xen_pfn_t *mfn_list; 196 size_t currentpages; 197 struct balloon_xenbus_softc *sc = balloon_sc = device_private(self); 198 199 aprint_normal(": Xen Balloon driver\n"); 200 sc->sc_dev = self; 201 202 /* Initialize target mutex and condvar */ 203 mutex_init(&sc->balloon_mtx, MUTEX_DEFAULT, IPL_NONE); 204 cv_init(&sc->balloon_cv, "xen_balloon"); 205 206 SLIST_INIT(&sc->balloon_page_entries); 207 sc->balloon_num_page_entries = 0; 208 209 /* Get current number of pages */ 210 currentpages = xenmem_get_currentreservation(); 211 212 KASSERT(currentpages > 0); 213 214 /* Update initial target value - no need to lock for initialization */ 215 sc->balloon_target = currentpages; 216 217 /* Set the values used by sysctl */ 218 sc->balloon_res_min = 219 BALLOON_PAGES_TO_KB(XEN_RESERVATION_MIN); 220 221 aprint_normal_dev(self, "current reservation: %"PRIu64" KiB\n", 222 BALLOON_PAGES_TO_KB(currentpages)); 223#if BALLOONDEBUG 224 aprint_normal_dev(self, "min reservation: %"PRIu64" KiB\n", 225 sc->balloon_res_min); 226 aprint_normal_dev(self, "max reservation: %"PRIu64" KiB\n", 227 BALLOON_PAGES_TO_KB(xenmem_get_maxreservation())); 228#endif 229 230 sc->bpge_pool = pool_cache_init(sizeof(struct balloon_page_entry), 231 0, 0, 0, "xen_bpge", NULL, IPL_NONE, bpge_ctor, bpge_dtor, NULL); 232 233 sysctl_kern_xen_balloon_setup(sc); 234 235 /* List of MFNs passed from/to balloon for inflating/deflating */ 236 mfn_list = kmem_alloc(BALLOON_DELTA * sizeof(*mfn_list), KM_SLEEP); 237 sc->sc_mfn_list = mfn_list; 238 239 /* Setup xenbus node watch callback */ 240 if (register_xenbus_watch(&balloon_xenbus_watch)) { 241 aprint_error_dev(self, "unable to watch memory/target\n"); 242 goto error; 243 } 244 245 /* Setup kernel thread to asynchronously (in/de)-flate the balloon */ 246 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, balloon_thread, 247 sc, NULL, "xen_balloon")) { 248 aprint_error_dev(self, "unable to create balloon thread\n"); 249 unregister_xenbus_watch(&balloon_xenbus_watch); 250 goto error; 251 } 252 253 if (!pmf_device_register(self, NULL, NULL)) 254 aprint_error_dev(self, "couldn't establish power handler\n"); 255 256 return; 257 258error: 259 sysctl_teardown(&sc->sc_log); 260 cv_destroy(&sc->balloon_cv); 261 mutex_destroy(&sc->balloon_mtx); 262 return; 263 264} 265 266/* 267 * Returns maximum memory reservation available to current domain. In Xen 268 * with DOMID_SELF, this hypercall never fails: return value should be 269 * interpreted as unsigned. 270 * 271 */ 272static size_t 273xenmem_get_maxreservation(void) 274{ 275 unsigned int ret; 276 277 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, 278 & (domid_t) { DOMID_SELF }); 279 280 if (ret == 0) { 281 /* well, a maximum reservation of 0 is really bogus */ 282 panic("%s failed, maximum reservation returned 0", __func__); 283 } 284 285 return ret; 286} 287 288/* Returns current reservation, in pages */ 289static size_t 290xenmem_get_currentreservation(void) 291{ 292 int ret; 293 294 ret = HYPERVISOR_memory_op(XENMEM_current_reservation, 295 & (domid_t) { DOMID_SELF }); 296 297 if (ret < 0) { 298 panic("%s failed: %d", __func__, ret); 299 } 300 301 return ret; 302} 303 304/* 305 * Get value (in KiB) of memory/target in XenStore for current domain 306 * A return value of 0 can be considered as bogus or absent. 307 */ 308static unsigned long long 309balloon_xenbus_read_target(void) 310{ 311 unsigned long long new_target; 312 int err = xenbus_read_ull(NULL, "memory", "target", &new_target, 0); 313 314 switch(err) { 315 case 0: 316 return new_target; 317 case ENOENT: 318 break; 319 default: 320 device_printf(balloon_sc->sc_dev, 321 "error %d, couldn't read xenbus target node\n", err); 322 break; 323 } 324 325 return 0; 326} 327 328/* Set memory/target value (in KiB) in XenStore for current domain */ 329static void 330balloon_xenbus_write_target(unsigned long long new_target) 331{ 332 int err = xenbus_printf(NULL, "memory", "target", "%llu", new_target); 333 334 if (err != 0) { 335 device_printf(balloon_sc->sc_dev, 336 "error %d, couldn't write xenbus target node\n", err); 337 } 338 339 return; 340} 341 342static int 343bpge_ctor(void *arg, void *obj, int flags) 344{ 345 struct balloon_page_entry *bpge = obj; 346 347 bpge->pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); 348 if (bpge->pg == NULL) 349 return ENOMEM; 350 351 return 0; 352 353} 354 355static void 356bpge_dtor(void *arg, void *obj) 357{ 358 struct balloon_page_entry *bpge = obj; 359 360 uvm_pagefree(bpge->pg); 361} 362 363/* 364 * Inflate balloon. Pages are moved out of domain's memory towards balloon. 365 */ 366static size_t 367balloon_inflate(struct balloon_xenbus_softc *sc, size_t tpages) 368{ 369 int rpages, ret; 370 paddr_t pa; 371 struct balloon_page_entry *bpg_entry; 372 xen_pfn_t *mfn_list = sc->sc_mfn_list; 373 374 struct xen_memory_reservation reservation = { 375 .mem_flags = 0, 376 .extent_order = 0, 377 .domid = DOMID_SELF 378 }; 379 380 KASSERT(tpages > 0); 381 KASSERT(tpages <= BALLOON_DELTA); 382 383 memset(mfn_list, 0, BALLOON_DELTA * sizeof(*mfn_list)); 384 385 /* allocate pages that will be given to Hypervisor */ 386 for (rpages = 0; rpages < tpages; rpages++) { 387 388 bpg_entry = pool_cache_get(sc->bpge_pool, PR_WAITOK); 389 if (bpg_entry == NULL) { 390 /* failed reserving a page for balloon */ 391 break; 392 } 393 394 pa = VM_PAGE_TO_PHYS(bpg_entry->pg); 395 396 mfn_list[rpages] = xpmap_ptom(pa) >> PAGE_SHIFT; 397 398 /* Invalidate pg */ 399 xpmap_ptom_unmap(pa); 400 401 SLIST_INSERT_HEAD(&balloon_sc->balloon_page_entries, 402 bpg_entry, entry); 403 balloon_sc->balloon_num_page_entries++; 404 } 405 406 /* Hand over pages to Hypervisor */ 407 set_xen_guest_handle(reservation.extent_start, mfn_list); 408 reservation.nr_extents = rpages; 409 410 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 411 &reservation); 412 if (ret != rpages) { 413 /* 414 * we are in bad shape: the operation failed for certain 415 * MFNs. As the API does not allow us to know which frame 416 * numbers were erroneous, we cannot really recover safely. 417 */ 418 panic("%s: decrease reservation failed: was %d, " 419 "returned %d", device_xname(sc->sc_dev), rpages, ret); 420 } 421 422#if BALLOONDEBUG 423 device_printf(sc->sc_dev, "inflate %zu => inflated by %d\n", 424 tpages, rpages); 425#endif 426 return rpages; 427} 428 429/* 430 * Deflate balloon. Pages are given back to domain's memory. 431 */ 432static size_t 433balloon_deflate(struct balloon_xenbus_softc *sc, size_t tpages) 434{ 435 int rpages, s, ret; 436 paddr_t pa; 437 struct balloon_page_entry *bpg_entry; 438 xen_pfn_t *mfn_list = sc->sc_mfn_list; 439 440 struct xen_memory_reservation reservation = { 441 .mem_flags = 0, 442 .extent_order = 0, 443 .domid = DOMID_SELF 444 }; 445 446 KASSERT(tpages > 0); 447 KASSERT(tpages <= BALLOON_DELTA); 448 449 memset(mfn_list, 0, BALLOON_DELTA * sizeof(*mfn_list)); 450 451#ifndef UVM_HOTPLUG 452 /* 453 * If the list is empty, we are deflating balloon beyond empty. This 454 * is currently unsupported as this would require to dynamically add 455 * new memory pages inside uvm(9) and instruct pmap(9) on how to 456 * handle them. For now, we clip reservation up to the point we 457 * can manage them, eg. the remaining bpg entries in the SLIST. 458 * XXX find a way to hotplug memory through uvm(9)/pmap(9). 459 */ 460 if (tpages > sc->balloon_num_page_entries) { 461 device_printf(sc->sc_dev, 462 "memory 'hot-plug' unsupported - clipping " 463 "reservation %zu => %zu pages.\n", 464 tpages, sc->balloon_num_page_entries); 465 tpages = sc->balloon_num_page_entries; 466 } 467#endif 468 469 /* reclaim pages from balloon */ 470 set_xen_guest_handle(reservation.extent_start, mfn_list); 471 reservation.nr_extents = tpages; 472 473 ret = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation); 474 475 if (ret < 0) { 476 panic("%s: increase reservation failed, ret %d", 477 device_xname(sc->sc_dev), ret); 478 } 479 480 if (ret != tpages) { 481 device_printf(sc->sc_dev, 482 "increase reservation incomplete: was %zu, " 483 "returned %d\n", tpages, ret); 484 } 485 486 /* plug pages back into memory through bpge entries */ 487 for (rpages = 0; rpages < ret; rpages++) { 488#ifdef UVM_HOTPLUG 489 extern paddr_t pmap_pa_end; 490 if (sc->balloon_num_page_entries == 0) { /*XXX: consolidate */ 491 /* "hot-plug": Stick it at the end of memory */ 492 pa = pmap_pa_end; 493 494 /* P2M update */ 495#if defined(_LP64) || defined(PAE) 496 atomic_add_64(&pmap_pa_end, PAGE_SIZE); 497#else 498 atomic_add_32(&pmap_pa_end, PAGE_SIZE); 499#endif 500 s = splvm(); 501 xpmap_ptom_map(pa, ptoa(mfn_list[rpages])); 502 xpq_queue_machphys_update(ptoa(mfn_list[rpages]), pa); 503 xpq_flush_queue(); 504 splx(s); 505 506 if (uvm_physseg_plug(atop(pa), 1, NULL) == false) { 507 /* Undo P2M */ 508 s = splvm(); 509 xpmap_ptom_unmap(pa); 510 xpq_queue_machphys_update(ptoa(mfn_list[rpages]), 0); 511 xpq_flush_queue(); 512 splx(s); 513#if defined(_LP64) || defined(PAE) 514 atomic_add_64(&pmap_pa_end, -PAGE_SIZE); 515#else 516 atomic_add_32(&pmap_pa_end, -PAGE_SIZE); 517#endif 518 break; 519 } 520 continue; 521 } 522#else 523 if (sc->balloon_num_page_entries == 0) { 524 /* 525 * XXX This is the case where extra "hot-plug" 526 * mem w.r.t boot comes in 527 */ 528 device_printf(sc->sc_dev, 529 "List empty. Cannot be collapsed further!\n"); 530 break; 531 } 532#endif 533 bpg_entry = SLIST_FIRST(&balloon_sc->balloon_page_entries); 534 SLIST_REMOVE_HEAD(&balloon_sc->balloon_page_entries, entry); 535 balloon_sc->balloon_num_page_entries--; 536 537 /* Update P->M */ 538 pa = VM_PAGE_TO_PHYS(bpg_entry->pg); 539 540 s = splvm(); 541 xpmap_ptom_map(pa, ptoa(mfn_list[rpages])); 542 xpq_queue_machphys_update(ptoa(mfn_list[rpages]), pa); 543 xpq_flush_queue(); 544 splx(s); 545 546 pool_cache_put(sc->bpge_pool, bpg_entry); 547 } 548 549#if BALLOONDEBUG 550 device_printf(sc->sc_dev, "deflate %zu => deflated by %d\n", 551 tpages, rpages); 552#endif 553 return rpages; 554} 555 556/* 557 * The balloon thread is responsible for handling inflate/deflate balloon 558 * requests for the current domain given the new "target" value. 559 */ 560static void 561balloon_thread(void *cookie) 562{ 563 int ret; 564 size_t current, diff, target; 565 struct balloon_xenbus_softc *sc = cookie; 566 567 for/*ever*/ (;;) { 568 current = xenmem_get_currentreservation(); 569 570 /* 571 * We assume that balloon_xenbus_watcher() and 572 * sysctl(9) handlers checked the sanity of the 573 * new target value. 574 */ 575 mutex_enter(&sc->balloon_mtx); 576 target = sc->balloon_target; 577 if (current != target) { 578 /* 579 * There is work to do. Inflate/deflate in 580 * increments of BALLOON_DELTA pages at maximum. The 581 * risk of integer wrapping is mitigated by 582 * BALLOON_DELTA, which is the upper bound. 583 */ 584 mutex_exit(&sc->balloon_mtx); 585 diff = MIN(target - current, BALLOON_DELTA); 586 if (current < target) 587 ret = balloon_deflate(sc, diff); 588 else 589 ret = balloon_inflate(sc, diff); 590 591 if (ret != diff) { 592 /* 593 * Something went wrong during operation. 594 * Log error then feedback current value in 595 * target so that thread gets back to waiting 596 * for the next iteration 597 */ 598 device_printf(sc->sc_dev, 599 "WARNING: balloon could not reach target " 600 "%zu (current %zu)\n", 601 target, current); 602 current = xenmem_get_currentreservation(); 603 mutex_enter(&sc->balloon_mtx); 604 sc->balloon_target = current; 605 mutex_exit(&sc->balloon_mtx); 606 } 607 } else { 608 /* no need for change -- wait for a signal */ 609 cv_wait(&sc->balloon_cv, &sc->balloon_mtx); 610 mutex_exit(&sc->balloon_mtx); 611 } 612 } 613} 614 615/* 616 * Handler called when memory/target value changes inside Xenstore. 617 * All sanity checks must also happen in this handler, as it is the common 618 * entry point where controller domain schedules balloon operations. 619 */ 620static void 621balloon_xenbus_watcher(struct xenbus_watch *watch, const char **vec, 622 unsigned int len) 623{ 624 size_t new_target; 625 uint64_t target_kb, target_max, target_min; 626 627 target_kb = balloon_xenbus_read_target(); 628 if (target_kb == 0) { 629 /* bogus -- just return */ 630 return; 631 } 632 633 mutex_enter(&balloon_sc->balloon_mtx); 634 target_min = balloon_sc->balloon_res_min; 635 mutex_exit(&balloon_sc->balloon_mtx); 636 if (target_kb < target_min) { 637 device_printf(balloon_sc->sc_dev, 638 "new target %"PRIu64" is below min %"PRIu64"\n", 639 target_kb, target_min); 640 return; 641 } 642 643 target_max = BALLOON_PAGES_TO_KB(xenmem_get_maxreservation()); 644 if (target_kb > target_max) { 645 /* 646 * Should not happen. Hypervisor should block balloon 647 * requests above mem-max. 648 */ 649 device_printf(balloon_sc->sc_dev, 650 "new target %"PRIu64" is above max %"PRIu64"\n", 651 target_kb, target_max); 652 return; 653 } 654 655 new_target = BALLOON_KB_TO_PAGES(target_kb); 656 657 device_printf(balloon_sc->sc_dev, 658 "current reservation: %zu pages => target: %zu pages\n", 659 xenmem_get_currentreservation(), new_target); 660 661 /* Only update target if its value changes */ 662 mutex_enter(&balloon_sc->balloon_mtx); 663 if (balloon_sc->balloon_target != new_target) { 664 balloon_sc->balloon_target = new_target; 665 cv_signal(&balloon_sc->balloon_cv); 666 } 667 mutex_exit(&balloon_sc->balloon_mtx); 668 669 return; 670} 671 672/* 673 * sysctl(9) stuff 674 */ 675 676/* routine to control the minimum memory reserved for the domain */ 677static int 678sysctl_kern_xen_balloon_min(SYSCTLFN_ARGS) 679{ 680 struct sysctlnode node; 681 u_quad_t newval; 682 int error; 683 684 node = *rnode; 685 node.sysctl_data = &newval; 686 687 mutex_enter(&balloon_sc->balloon_mtx); 688 newval = balloon_sc->balloon_res_min; 689 mutex_exit(&balloon_sc->balloon_mtx); 690 691 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 692 if (error || newp == NULL) 693 return error; 694 695 /* Safeguard value: refuse to go below. */ 696 if (newval < XEN_RESERVATION_MIN) { 697 device_printf(balloon_sc->sc_dev, 698 "cannot set min below minimum safe value (%d)\n", 699 XEN_RESERVATION_MIN); 700 return EPERM; 701 } 702 703 mutex_enter(&balloon_sc->balloon_mtx); 704 if (balloon_sc->balloon_res_min != newval) 705 balloon_sc->balloon_res_min = newval; 706 mutex_exit(&balloon_sc->balloon_mtx); 707 708 return 0; 709} 710 711/* Returns the maximum memory reservation of the domain */ 712static int 713sysctl_kern_xen_balloon_max(SYSCTLFN_ARGS) 714{ 715 struct sysctlnode node; 716 u_quad_t node_val; 717 718 node = *rnode; 719 720 node_val = BALLOON_PAGES_TO_KB(xenmem_get_maxreservation()); 721 node.sysctl_data = &node_val; 722 return sysctl_lookup(SYSCTLFN_CALL(&node)); 723} 724 725/* Returns the current memory reservation of the domain */ 726static int 727sysctl_kern_xen_balloon_current(SYSCTLFN_ARGS) 728{ 729 struct sysctlnode node; 730 u_quad_t node_val; 731 732 node = *rnode; 733 734 node_val = BALLOON_PAGES_TO_KB(xenmem_get_currentreservation()); 735 node.sysctl_data = &node_val; 736 return sysctl_lookup(SYSCTLFN_CALL(&node)); 737} 738 739/* 740 * Returns the target memory reservation of the domain 741 * When reading, this sysctl will return the value of the balloon_target 742 * variable, converted into KiB 743 * When used for writing, it will update the new memory/target value 744 * in XenStore, but will not update the balloon_target variable directly. 745 * This will be done by the Xenbus watch handler, balloon_xenbus_watcher(). 746 */ 747static int 748sysctl_kern_xen_balloon_target(SYSCTLFN_ARGS) 749{ 750 struct sysctlnode node; 751 u_quad_t newval, res_min, res_max; 752 int error; 753 754 node = *rnode; 755 node.sysctl_data = &newval; 756 757 mutex_enter(&balloon_sc->balloon_mtx); 758 newval = BALLOON_PAGES_TO_KB(balloon_sc->balloon_target); 759 res_min = balloon_sc->balloon_res_min; 760 mutex_exit(&balloon_sc->balloon_mtx); 761 762 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 763 if (newp == NULL || error != 0) { 764 return error; 765 } 766 767 /* 768 * Sanity check new size 769 * We should not balloon below the minimum reservation 770 * set by the domain, nor above the maximum reservation set 771 * by domain controller. 772 * Note: domain is not supposed to receive balloon requests when 773 * they are above maximum reservation, but better be safe than 774 * sorry. 775 */ 776 res_max = BALLOON_PAGES_TO_KB(xenmem_get_maxreservation()); 777 if (newval < res_min || newval > res_max) { 778#if BALLOONDEBUG 779 device_printf(balloon_sc->sc_dev, 780 "new value out of bounds: %"PRIu64"\n", newval); 781 device_printf(balloon_sc->sc_dev, 782 "min %"PRIu64", max %"PRIu64"\n", res_min, res_max); 783#endif 784 return EPERM; 785 } 786 787 /* 788 * Write new value inside Xenstore. This will fire the memory/target 789 * watch handler, balloon_xenbus_watcher(). 790 */ 791 balloon_xenbus_write_target(newval); 792 793 return 0; 794} 795 796/* sysctl(9) nodes creation */ 797static void 798sysctl_kern_xen_balloon_setup(struct balloon_xenbus_softc *sc) 799{ 800 const struct sysctlnode *node = NULL; 801 struct sysctllog **clog = &sc->sc_log; 802 803 sysctl_createv(clog, 0, NULL, &node, 804 CTLFLAG_PERMANENT, 805 CTLTYPE_NODE, "machdep", NULL, 806 NULL, 0, NULL, 0, 807 CTL_MACHDEP, CTL_EOL); 808 809 sysctl_createv(clog, 0, &node, &node, 810 CTLFLAG_PERMANENT, 811 CTLTYPE_NODE, "xen", 812 SYSCTL_DESCR("Xen top level node"), 813 NULL, 0, NULL, 0, 814 CTL_CREATE, CTL_EOL); 815 816 sysctl_createv(clog, 0, &node, &node, 817 CTLFLAG_PERMANENT, 818 CTLTYPE_NODE, "balloon", 819 SYSCTL_DESCR("Balloon details"), 820 NULL, 0, NULL, 0, 821 CTL_CREATE, CTL_EOL); 822 823 sysctl_createv(clog, 0, &node, NULL, 824 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 825 CTLTYPE_QUAD, "current", 826 SYSCTL_DESCR("Domain's current memory reservation from " 827 "hypervisor, in KiB."), 828 sysctl_kern_xen_balloon_current, 0, NULL, 0, 829 CTL_CREATE, CTL_EOL); 830 831 sysctl_createv(clog, 0, &node, NULL, 832 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 833 CTLTYPE_QUAD, "target", 834 SYSCTL_DESCR("Target memory reservation for domain, in KiB."), 835 sysctl_kern_xen_balloon_target, 0, NULL, 0, 836 CTL_CREATE, CTL_EOL); 837 838 sysctl_createv(clog, 0, &node, NULL, 839 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 840 CTLTYPE_QUAD, "min", 841 SYSCTL_DESCR("Minimum amount of memory the domain " 842 "reserves, in KiB."), 843 sysctl_kern_xen_balloon_min, 0, NULL, 0, 844 CTL_CREATE, CTL_EOL); 845 846 sysctl_createv(clog, 0, &node, NULL, 847 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 848 CTLTYPE_QUAD, "max", 849 SYSCTL_DESCR("Maximum amount of memory the domain " 850 "can use, in KiB."), 851 sysctl_kern_xen_balloon_max, 0, NULL, 0, 852 CTL_CREATE, CTL_EOL); 853} 854