1/****************************************************************************** 2 * balloon.c 3 * 4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 * 6 * Copyright (c) 2003, B Dragovic 7 * Copyright (c) 2003-2004, M Williamson, K Fraser 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 9 * 10 * This file may be distributed separately from the Linux kernel, or 11 * incorporated into other software packages, subject to the following license: 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this source file (the "Software"), to deal in the Software without 15 * restriction, including without limitation the rights to use, copy, modify, 16 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * and to permit persons to whom the Software is furnished to do so, subject to 18 * the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * IN THE SOFTWARE. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD$"); 34 35#include <sys/param.h> 36#include <sys/lock.h> 37#include <sys/kernel.h> 38#include <sys/kthread.h> 39#include <sys/malloc.h> 40#include <sys/mutex.h> 41#include <sys/sysctl.h> 42#include <sys/module.h> 43 44#include <vm/vm.h> 45#include <vm/vm_page.h> 46 47#include <xen/xen-os.h> 48#include <xen/hypervisor.h> 49#include <xen/features.h> 50#include <xen/xenstore/xenstorevar.h> 51 52static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); 53 54/* Convert from KB (as fetched from xenstore) to number of PAGES */ 55#define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10) 56 57struct mtx balloon_mutex; 58 59/* We increase/decrease in batches which fit in a page */ 60static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; 61 62struct balloon_stats { 63 /* We aim for 'current allocation' == 'target allocation'. */ 64 unsigned long current_pages; 65 unsigned long target_pages; 66 /* We may hit the hard limit in Xen. If we do then we remember it. */ 67 unsigned long hard_limit; 68 /* 69 * Drivers may alter the memory reservation independently, but they 70 * must inform the balloon driver so we avoid hitting the hard limit. 71 */ 72 unsigned long driver_pages; 73 /* Number of pages in high- and low-memory balloons. */ 74 unsigned long balloon_low; 75 unsigned long balloon_high; 76}; 77 78static struct balloon_stats balloon_stats; 79#define bs balloon_stats 80 81SYSCTL_DECL(_dev_xen); 82static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); 83SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, 84 &bs.current_pages, 0, "Current allocation"); 85SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, 86 &bs.target_pages, 0, "Target allocation"); 87SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, 88 &bs.driver_pages, 0, "Driver pages"); 89SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, 90 &bs.hard_limit, 0, "Xen hard limit"); 91SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, 92 &bs.balloon_low, 0, "Low-mem balloon"); 93SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, 94 &bs.balloon_high, 0, "High-mem balloon"); 95 96/* List of ballooned pages, threaded through the mem_map array. */ 97static TAILQ_HEAD(,vm_page) ballooned_pages; 98 99/* Main work function, always executed in process context. */ 100static void balloon_process(void *unused); 101 102#define IPRINTK(fmt, args...) \ 103 printk(KERN_INFO "xen_mem: " fmt, ##args) 104#define WPRINTK(fmt, args...) \ 105 printk(KERN_WARNING "xen_mem: " fmt, ##args) 106 107static unsigned long 108current_target(void) 109{ 110 unsigned long target = min(bs.target_pages, bs.hard_limit); 111 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) 112 target = bs.current_pages + bs.balloon_low + bs.balloon_high; 113 return (target); 114} 115 116static unsigned long 117minimum_target(void) 118{ 119 unsigned long min_pages, curr_pages = current_target(); 120 121#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 122 /* 123 * Simple continuous piecewiese linear function: 124 * max MiB -> min MiB gradient 125 * 0 0 126 * 16 16 127 * 32 24 128 * 128 72 (1/2) 129 * 512 168 (1/4) 130 * 2048 360 (1/8) 131 * 8192 552 (1/32) 132 * 32768 1320 133 * 131072 4392 134 */ 135 if (realmem < MB2PAGES(128)) 136 min_pages = MB2PAGES(8) + (realmem >> 1); 137 else if (realmem < MB2PAGES(512)) 138 min_pages = MB2PAGES(40) + (realmem >> 2); 139 else if (realmem < MB2PAGES(2048)) 140 min_pages = MB2PAGES(104) + (realmem >> 3); 141 else 142 min_pages = MB2PAGES(296) + (realmem >> 5); 143#undef MB2PAGES 144 145 /* Don't enforce growth */ 146 return (min(min_pages, curr_pages)); 147} 148 149static int 150increase_reservation(unsigned long nr_pages) 151{ 152 unsigned long i; 153 vm_page_t page; 154 long rc; 155 struct xen_memory_reservation reservation = { 156 .address_bits = 0, 157 .extent_order = 0, 158 .domid = DOMID_SELF 159 }; 160 161 mtx_assert(&balloon_mutex, MA_OWNED); 162 163 if (nr_pages > nitems(frame_list)) 164 nr_pages = nitems(frame_list); 165 166 for (page = TAILQ_FIRST(&ballooned_pages), i = 0; 167 i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) { 168 KASSERT(page != NULL, ("ballooned_pages list corrupt")); 169 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 170 } 171 172 set_xen_guest_handle(reservation.extent_start, frame_list); 173 reservation.nr_extents = nr_pages; 174 rc = HYPERVISOR_memory_op( 175 XENMEM_populate_physmap, &reservation); 176 if (rc < nr_pages) { 177 if (rc > 0) { 178 int ret; 179 180 /* We hit the Xen hard limit: reprobe. */ 181 reservation.nr_extents = rc; 182 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 183 &reservation); 184 KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); 185 } 186 if (rc >= 0) 187 bs.hard_limit = (bs.current_pages + rc - 188 bs.driver_pages); 189 goto out; 190 } 191 192 for (i = 0; i < nr_pages; i++) { 193 page = TAILQ_FIRST(&ballooned_pages); 194 KASSERT(page != NULL, ("Unable to get ballooned page")); 195 TAILQ_REMOVE(&ballooned_pages, page, plinks.q); 196 bs.balloon_low--; 197 198 KASSERT(xen_feature(XENFEAT_auto_translated_physmap), 199 ("auto translated physmap but mapping is valid")); 200 201 vm_page_free(page); 202 } 203 204 bs.current_pages += nr_pages; 205 206 out: 207 return (0); 208} 209 210static int 211decrease_reservation(unsigned long nr_pages) 212{ 213 unsigned long i; 214 vm_page_t page; 215 int need_sleep = 0; 216 int ret; 217 struct xen_memory_reservation reservation = { 218 .address_bits = 0, 219 .extent_order = 0, 220 .domid = DOMID_SELF 221 }; 222 223 mtx_assert(&balloon_mutex, MA_OWNED); 224 225 if (nr_pages > nitems(frame_list)) 226 nr_pages = nitems(frame_list); 227 228 for (i = 0; i < nr_pages; i++) { 229 if ((page = vm_page_alloc(NULL, 0, 230 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 231 VM_ALLOC_ZERO)) == NULL) { 232 nr_pages = i; 233 need_sleep = 1; 234 break; 235 } 236 237 if ((page->flags & PG_ZERO) == 0) { 238 /* 239 * Zero the page, or else we might be leaking 240 * important data to other domains on the same 241 * host. Xen doesn't scrub ballooned out memory 242 * pages, the guest is in charge of making 243 * sure that no information is leaked. 244 */ 245 pmap_zero_page(page); 246 } 247 248 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 249 250 TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q); 251 bs.balloon_low++; 252 } 253 254 set_xen_guest_handle(reservation.extent_start, frame_list); 255 reservation.nr_extents = nr_pages; 256 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 257 KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); 258 259 bs.current_pages -= nr_pages; 260 261 return (need_sleep); 262} 263 264/* 265 * We avoid multiple worker processes conflicting via the balloon mutex. 266 * We may of course race updates of the target counts (which are protected 267 * by the balloon lock), or with changes to the Xen hard limit, but we will 268 * recover from these in time. 269 */ 270static void 271balloon_process(void *unused) 272{ 273 int need_sleep = 0; 274 long credit; 275 276 mtx_lock(&balloon_mutex); 277 for (;;) { 278 int sleep_time; 279 280 do { 281 credit = current_target() - bs.current_pages; 282 if (credit > 0) 283 need_sleep = (increase_reservation(credit) != 0); 284 if (credit < 0) 285 need_sleep = (decrease_reservation(-credit) != 0); 286 287 } while ((credit != 0) && !need_sleep); 288 289 /* Schedule more work if there is some still to be done. */ 290 if (current_target() != bs.current_pages) 291 sleep_time = hz; 292 else 293 sleep_time = 0; 294 295 msleep(balloon_process, &balloon_mutex, 0, "balloon", 296 sleep_time); 297 } 298 mtx_unlock(&balloon_mutex); 299} 300 301/* Resets the Xen limit, sets new target, and kicks off processing. */ 302static void 303set_new_target(unsigned long target) 304{ 305 /* No need for lock. Not read-modify-write updates. */ 306 bs.hard_limit = ~0UL; 307 bs.target_pages = max(target, minimum_target()); 308 wakeup(balloon_process); 309} 310 311static struct xs_watch target_watch = 312{ 313 .node = "memory/target", 314 .max_pending = 1, 315}; 316 317/* React to a change in the target key */ 318static void 319watch_target(struct xs_watch *watch, 320 const char **vec, unsigned int len) 321{ 322 unsigned long long new_target; 323 int err; 324 325 err = xs_scanf(XST_NIL, "memory", "target", NULL, 326 "%llu", &new_target); 327 if (err) { 328 /* This is ok (for domain0 at least) - so just return */ 329 return; 330 } 331 332 /* 333 * The given memory/target value is in KiB, so it needs converting to 334 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 335 */ 336 set_new_target(new_target >> KB_TO_PAGE_SHIFT); 337} 338 339/*------------------ Private Device Attachment Functions --------------------*/ 340/** 341 * \brief Identify instances of this device type in the system. 342 * 343 * \param driver The driver performing this identify action. 344 * \param parent The NewBus parent device for any devices this method adds. 345 */ 346static void 347xenballoon_identify(driver_t *driver __unused, device_t parent) 348{ 349 /* 350 * A single device instance for our driver is always present 351 * in a system operating under Xen. 352 */ 353 BUS_ADD_CHILD(parent, 0, driver->name, 0); 354} 355 356/** 357 * \brief Probe for the existence of the Xen Balloon device 358 * 359 * \param dev NewBus device_t for this Xen control instance. 360 * 361 * \return Always returns 0 indicating success. 362 */ 363static int 364xenballoon_probe(device_t dev) 365{ 366 367 device_set_desc(dev, "Xen Balloon Device"); 368 return (0); 369} 370 371/** 372 * \brief Attach the Xen Balloon device. 373 * 374 * \param dev NewBus device_t for this Xen control instance. 375 * 376 * \return On success, 0. Otherwise an errno value indicating the 377 * type of failure. 378 */ 379static int 380xenballoon_attach(device_t dev) 381{ 382 int err; 383 384 mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); 385 386 bs.current_pages = realmem; 387 bs.target_pages = bs.current_pages; 388 bs.balloon_low = 0; 389 bs.balloon_high = 0; 390 bs.driver_pages = 0UL; 391 bs.hard_limit = ~0UL; 392 393 kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); 394 395 target_watch.callback = watch_target; 396 397 err = xs_register_watch(&target_watch); 398 if (err) 399 device_printf(dev, 400 "xenballon: failed to set balloon watcher\n"); 401 402 return (err); 403} 404 405/*-------------------- Private Device Attachment Data -----------------------*/ 406static device_method_t xenballoon_methods[] = { 407 /* Device interface */ 408 DEVMETHOD(device_identify, xenballoon_identify), 409 DEVMETHOD(device_probe, xenballoon_probe), 410 DEVMETHOD(device_attach, xenballoon_attach), 411 412 DEVMETHOD_END 413}; 414 415DEFINE_CLASS_0(xenballoon, xenballoon_driver, xenballoon_methods, 0); 416devclass_t xenballoon_devclass; 417 418DRIVER_MODULE(xenballoon, xenstore, xenballoon_driver, xenballoon_devclass, 419 NULL, NULL); 420