x86_xpmap.c revision 1.2
1/* $NetBSD: x86_xpmap.c,v 1.2 2007/11/22 16:17:05 bouyer Exp $ */ 2 3/* 4 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19/* 20 * Copyright (c) 2006, 2007 Manuel Bouyer. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the above copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 3. All advertising materials mentioning features or use of this software 31 * must display the following acknowledgement: 32 * This product includes software developed by Manuel Bouyer. 33 * 4. The name of the author may not be used to endorse or promote products 34 * derived from this software without specific prior written permission. 35 * 36 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 37 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 39 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 45 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 * 47 */ 48 49/* 50 * 51 * Copyright (c) 2004 Christian Limpach. 52 * All rights reserved. 53 * 54 * Redistribution and use in source and binary forms, with or without 55 * modification, are permitted provided that the following conditions 56 * are met: 57 * 1. Redistributions of source code must retain the above copyright 58 * notice, this list of conditions and the following disclaimer. 59 * 2. Redistributions in binary form must reproduce the above copyright 60 * notice, this list of conditions and the following disclaimer in the 61 * documentation and/or other materials provided with the distribution. 62 * 3. All advertising materials mentioning features or use of this software 63 * must display the following acknowledgement: 64 * This product includes software developed by Christian Limpach. 65 * 4. The name of the author may not be used to endorse or promote products 66 * derived from this software without specific prior written permission. 67 * 68 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 69 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 70 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 71 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 72 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 73 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 74 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 75 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 76 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 77 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 78 */ 79 80 81#include <sys/cdefs.h> 82__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.2 2007/11/22 16:17:05 bouyer Exp $"); 83 84#include "opt_xen.h" 85 86#include <sys/param.h> 87#include <sys/systm.h> 88 89#include <uvm/uvm.h> 90 91#include <machine/pmap.h> 92#include <machine/gdt.h> 93#include <xen/xenfunc.h> 94 95#include <dev/isa/isareg.h> 96#include <machine/isa_machdep.h> 97 98#undef XENDEBUG 99/* #define XENDEBUG_SYNC */ 100/* #define XENDEBUG_LOW */ 101 102#ifdef XENDEBUG 103#define XENPRINTF(x) printf x 104#define XENPRINTK(x) printk x 105#define XENPRINTK2(x) /* printk x */ 106 107static char XBUF[256]; 108#else 109#define XENPRINTF(x) 110#define XENPRINTK(x) 111#define XENPRINTK2(x) 112#endif 113#define PRINTF(x) printf x 114#define PRINTK(x) printk x 115 116volatile shared_info_t *HYPERVISOR_shared_info; 117union start_info_union start_info_union; 118 119void xen_failsafe_handler(void); 120 121#ifdef XEN3 122#define HYPERVISOR_mmu_update_self(req, count, success_count) \ 123 HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF) 124#else 125#define HYPERVISOR_mmu_update_self(req, count, success_count) \ 126 HYPERVISOR_mmu_update((req), (count), (success_count)) 127#endif 128 129void 130xen_failsafe_handler(void) 131{ 132 133 panic("xen_failsafe_handler called!\n"); 134} 135 136 137#ifndef __x86_64__ 138void 139xen_update_descriptor(union descriptor *table, union descriptor *entry) 140{ 141 paddr_t pa; 142 pt_entry_t *ptp; 143 144 ptp = kvtopte((vaddr_t)table); 145 pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME); 146 if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1])) 147 panic("HYPERVISOR_update_descriptor failed\n"); 148} 149#endif 150 151void 152xen_set_ldt(vaddr_t base, uint32_t entries) 153{ 154 vaddr_t va; 155 vaddr_t end; 156 pt_entry_t *ptp, *maptp; 157 int s; 158 159#ifdef __x86_64__ 160 end = base + (entries << 3); 161#else 162 end = base + entries * sizeof(union descriptor); 163#endif 164 165 for (va = base; va < end; va += PAGE_SIZE) { 166 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 167 ptp = kvtopte(va); 168 maptp = (pt_entry_t *)vtomach((vaddr_t)ptp); 169 XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base, 170 entries, ptp, maptp)); 171 PTE_CLEARBITS(ptp, maptp, PG_RW); 172 } 173 s = splvm(); 174 PTE_UPDATES_FLUSH(); 175 176 xpq_queue_set_ldt(base, entries); 177 xpq_flush_queue(); 178 splx(s); 179} 180 181#ifdef XENDEBUG 182void xpq_debug_dump(void); 183#endif 184 185#define XPQUEUE_SIZE 2048 186static mmu_update_t xpq_queue[XPQUEUE_SIZE]; 187static int xpq_idx = 0; 188 189void 190xpq_flush_queue() 191{ 192 int i, ok; 193 194 XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx)); 195 for (i = 0; i < xpq_idx; i++) 196 XENPRINTK2(("%d: %p %08x\n", i, (u_int)xpq_queue[i].ptr, 197 (u_int)xpq_queue[i].val)); 198 if (xpq_idx != 0 && 199 HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) { 200 printf("xpq_flush_queue: %d entries \n", xpq_idx); 201 for (i = 0; i < xpq_idx; i++) 202 printf("0x%16lx: 0x%16lx\n", 203 xpq_queue[i].ptr, xpq_queue[i].val); 204 panic("HYPERVISOR_mmu_update failed\n"); 205 } 206 xpq_idx = 0; 207} 208 209static inline void 210xpq_increment_idx(void) 211{ 212 213 xpq_idx++; 214 if (__predict_false(xpq_idx == XPQUEUE_SIZE)) 215 xpq_flush_queue(); 216} 217 218void 219xpq_queue_machphys_update(paddr_t ma, paddr_t pa) 220{ 221 XENPRINTK2(("xpq_queue_machphys_update ma=%p pa=%p\n", (void *)ma, (void *)pa)); 222 xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE; 223 xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT; 224 xpq_increment_idx(); 225#ifdef XENDEBUG_SYNC 226 xpq_flush_queue(); 227#endif 228} 229 230void 231xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val) 232{ 233 234 KASSERT(((paddr_t)ptr & 3) == 0); 235 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE; 236 xpq_queue[xpq_idx].val = val; 237 xpq_increment_idx(); 238#ifdef XENDEBUG_SYNC 239 xpq_flush_queue(); 240#endif 241} 242 243void 244xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val) 245{ 246 247 KASSERT(((paddr_t)ptr & 3) == 0); 248 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE; 249 xpq_queue[xpq_idx].val = val; 250 xpq_increment_idx(); 251#ifdef XENDEBUG_SYNC 252 xpq_flush_queue(); 253#endif 254} 255 256#ifdef XEN3 257void 258xpq_queue_pt_switch(paddr_t pa) 259{ 260 struct mmuext_op op; 261 xpq_flush_queue(); 262 263 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa)); 264 op.cmd = MMUEXT_NEW_BASEPTR; 265 op.arg1.mfn = pa >> PAGE_SHIFT; 266 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) 267 panic("xpq_queue_pt_switch"); 268} 269 270void 271xpq_queue_pin_table(paddr_t pa) 272{ 273 struct mmuext_op op; 274 xpq_flush_queue(); 275 276 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa)); 277 op.arg1.mfn = pa >> PAGE_SHIFT; 278 279#ifdef __x86_64__ 280 op.cmd = MMUEXT_PIN_L4_TABLE; 281#else 282 op.cmd = MMUEXT_PIN_L2_TABLE; 283#endif 284 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) 285 panic("xpq_queue_pin_table"); 286} 287 288void 289xpq_queue_unpin_table(paddr_t pa) 290{ 291 struct mmuext_op op; 292 xpq_flush_queue(); 293 294 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa)); 295 op.arg1.mfn = pa >> PAGE_SHIFT; 296 op.cmd = MMUEXT_UNPIN_TABLE; 297 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) 298 panic("xpq_queue_unpin_table"); 299} 300 301void 302xpq_queue_set_ldt(vaddr_t va, uint32_t entries) 303{ 304 struct mmuext_op op; 305 xpq_flush_queue(); 306 307 XENPRINTK2(("xpq_queue_set_ldt\n")); 308 KASSERT(va == (va & ~PAGE_MASK)); 309 op.cmd = MMUEXT_SET_LDT; 310 op.arg1.linear_addr = va; 311 op.arg2.nr_ents = entries; 312 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) 313 panic("xpq_queue_set_ldt"); 314} 315 316void 317xpq_queue_tlb_flush() 318{ 319 struct mmuext_op op; 320 xpq_flush_queue(); 321 322 XENPRINTK2(("xpq_queue_tlb_flush\n")); 323 op.cmd = MMUEXT_TLB_FLUSH_LOCAL; 324 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) 325 panic("xpq_queue_tlb_flush"); 326} 327 328void 329xpq_flush_cache() 330{ 331 struct mmuext_op op; 332 int s = splvm(); 333 xpq_flush_queue(); 334 335 XENPRINTK2(("xpq_queue_flush_cache\n")); 336 op.cmd = MMUEXT_FLUSH_CACHE; 337 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) 338 panic("xpq_flush_cache"); 339 splx(s); 340} 341 342void 343xpq_queue_invlpg(vaddr_t va) 344{ 345 struct mmuext_op op; 346 xpq_flush_queue(); 347 348 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va)); 349 op.cmd = MMUEXT_INVLPG_LOCAL; 350 op.arg1.linear_addr = (va & ~PAGE_MASK); 351 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) 352 panic("xpq_queue_invlpg"); 353} 354 355int 356xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom) 357{ 358 mmu_update_t op; 359 int ok; 360 xpq_flush_queue(); 361 362 op.ptr = (paddr_t)ptr; 363 op.val = val; 364 if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0) 365 return EFAULT; 366 return (0); 367} 368#else /* XEN3 */ 369void 370xpq_queue_pt_switch(paddr_t pa) 371{ 372 373 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa)); 374 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND; 375 xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR; 376 xpq_increment_idx(); 377} 378 379void 380xpq_queue_pin_table(paddr_t pa) 381{ 382 383 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa)); 384 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND; 385 xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE; 386 xpq_increment_idx(); 387} 388 389void 390xpq_queue_unpin_table(paddr_t pa) 391{ 392 393 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa)); 394 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND; 395 xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE; 396 xpq_increment_idx(); 397} 398 399void 400xpq_queue_set_ldt(vaddr_t va, uint32_t entries) 401{ 402 403 XENPRINTK2(("xpq_queue_set_ldt\n")); 404 KASSERT(va == (va & ~PAGE_MASK)); 405 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va; 406 xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT); 407 xpq_increment_idx(); 408} 409 410void 411xpq_queue_tlb_flush() 412{ 413 414 XENPRINTK2(("xpq_queue_tlb_flush\n")); 415 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND; 416 xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH; 417 xpq_increment_idx(); 418} 419 420void 421xpq_flush_cache() 422{ 423 int s = splvm(); 424 425 XENPRINTK2(("xpq_queue_flush_cache\n")); 426 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND; 427 xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE; 428 xpq_increment_idx(); 429 xpq_flush_queue(); 430 splx(s); 431} 432 433void 434xpq_queue_invlpg(vaddr_t va) 435{ 436 437 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va)); 438 xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND; 439 xpq_queue[xpq_idx].val = MMUEXT_INVLPG; 440 xpq_increment_idx(); 441} 442 443int 444xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom) 445{ 446 mmu_update_t xpq_up[3]; 447 448 xpq_up[0].ptr = MMU_EXTENDED_COMMAND; 449 xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16); 450 xpq_up[1].ptr = (paddr_t)ptr; 451 xpq_up[1].val = val; 452 if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0) 453 return EFAULT; 454 return (0); 455} 456#endif /* XEN3 */ 457 458#ifdef XENDEBUG 459void 460xpq_debug_dump() 461{ 462 int i; 463 464 XENPRINTK2(("idx: %d\n", xpq_idx)); 465 for (i = 0; i < xpq_idx; i++) { 466 sprintf(XBUF, "%x %08x ", (u_int)xpq_queue[i].ptr, 467 (u_int)xpq_queue[i].val); 468 if (++i < xpq_idx) 469 sprintf(XBUF + strlen(XBUF), "%x %08x ", 470 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val); 471 if (++i < xpq_idx) 472 sprintf(XBUF + strlen(XBUF), "%x %08x ", 473 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val); 474 if (++i < xpq_idx) 475 sprintf(XBUF + strlen(XBUF), "%x %08x ", 476 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val); 477 XENPRINTK2(("%d: %s\n", xpq_idx, XBUF)); 478 } 479} 480#endif 481 482 483#ifdef __x86_64__ 484extern volatile struct xencons_interface *xencons_interface; /* XXX */ 485extern struct xenstore_domain_interface *xenstore_interface; /* XXX */ 486 487static void xen_bt_set_readonly (vaddr_t); 488static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int); 489 490/* How many PDEs ? */ 491#if L2_SLOT_KERNBASE > 0 492#define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1)) 493#else 494#define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1) 495#endif 496 497/* 498 * Construct and switch to new pagetables 499 * first_avail is the first vaddr we can use after 500 * we get rid of Xen pagetables 501 */ 502 503vaddr_t xen_pmap_bootstrap (void); 504 505/* 506 * Function to get rid of Xen bootstrap tables 507 */ 508 509vaddr_t 510xen_pmap_bootstrap() 511{ 512 int count, iocount = 0; 513 vaddr_t bootstrap_tables, init_tables; 514 515 xpmap_phys_to_machine_mapping = (paddr_t *) xen_start_info.mfn_list; 516 init_tables = xen_start_info.pt_base; 517 __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables)); 518 519 /* Space after Xen boostrap tables should be free */ 520 bootstrap_tables = xen_start_info.pt_base + 521 (xen_start_info.nr_pt_frames * PAGE_SIZE); 522 523 /* Calculate how many tables we need */ 524 count = TABLE_L2_ENTRIES; 525 526#ifdef DOM0OPS 527 if (xen_start_info.flags & SIF_INITDOMAIN) { 528 /* space for ISA I/O mem */ 529 iocount = IOM_SIZE / PAGE_SIZE; 530 } 531#endif 532 533 /* 534 * Xen space we'll reclaim may not be enough for our new page tables, 535 * move bootstrap tables if necessary 536 */ 537 538 if (bootstrap_tables < init_tables + ((count+3+iocount) * PAGE_SIZE)) 539 bootstrap_tables = init_tables + 540 ((count+3+iocount) * PAGE_SIZE); 541 542 /* Create temporary tables */ 543 xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables, 544 xen_start_info.nr_pt_frames, count, 0); 545 546 /* get vaddr space for the shared info and the console pages */ 547 548 /* Create final tables */ 549 xen_bootstrap_tables(bootstrap_tables, init_tables, 550 count + 3, count, 1); 551 552 return (init_tables + ((count + 3) * PAGE_SIZE)); 553} 554 555 556/* 557 * Build a new table and switch to it 558 * old_count is # of old tables (including PGD, PDTPE and PDE) 559 * new_count is # of new tables (PTE only) 560 * we assume areas don't overlap 561 */ 562 563 564static void 565xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd, 566 int old_count, int new_count, int final) 567{ 568 pd_entry_t *pdtpe, *pde, *pte; 569 pd_entry_t *cur_pgd, *bt_pgd; 570 paddr_t addr, page; 571 vaddr_t avail, text_end, map_end; 572 int i; 573 extern char __data_start; 574 575 __PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n", 576 old_pgd, new_pgd, old_count, new_count)); 577 text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK; 578 /* 579 * size of R/W area after kernel text: 580 * xencons_interface (if present) 581 * xenstore_interface (if present) 582 * table pages (new_count + 3 entries) 583 * UAREA 584 * dummy user PGD 585 * extra mappings (only when final is true): 586 * HYPERVISOR_shared_info 587 * ISA I/O mem (if needed) 588 */ 589 map_end = new_pgd + ((new_count + 3 + UPAGES + 1) * NBPG); 590 if (final) { 591 HYPERVISOR_shared_info = (struct shared_info *)map_end; 592 map_end += NBPG; 593 } 594#ifdef DOM0OPS 595 if (final && (xen_start_info.flags & SIF_INITDOMAIN)) { 596 /* ISA I/O mem */ 597 atdevbase = map_end; 598 map_end += IOM_SIZE; 599 } 600#endif /* DOM0OPS */ 601 602 __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n", 603 text_end, map_end)); 604 605 /* 606 * Create bootstrap page tables 607 * What we need: 608 * - a PGD (level 4) 609 * - a PDTPE (level 3) 610 * - a PDE (level2) 611 * - some PTEs (level 1) 612 */ 613 614 cur_pgd = (pd_entry_t *) old_pgd; 615 bt_pgd = (pd_entry_t *) new_pgd; 616 memset (bt_pgd, 0, PAGE_SIZE); 617 avail = new_pgd + PAGE_SIZE; 618 619 /* Install level 3 */ 620 pdtpe = (pd_entry_t *) avail; 621 memset (pdtpe, 0, PAGE_SIZE); 622 avail += PAGE_SIZE; 623 624 addr = ((paddr_t) pdtpe) - KERNBASE; 625 bt_pgd[pl4_pi(KERNTEXTOFF)] = 626 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V; 627 628 __PRINTK(("L3 va 0x%lx pa 0x%lx entry 0x%lx -> L4[0x%x]\n", 629 pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF))); 630 631 /* Level 2 */ 632 pde = (pd_entry_t *) avail; 633 memset(pde, 0, PAGE_SIZE); 634 avail += PAGE_SIZE; 635 636 addr = ((paddr_t) pde) - KERNBASE; 637 pdtpe[pl3_pi(KERNTEXTOFF)] = 638 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V; 639 __PRINTK(("L2 va 0x%lx pa 0x%lx entry 0x%lx -> L3[0x%x]\n", 640 pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF))); 641 642 /* Level 1 */ 643 page = KERNTEXTOFF; 644 for (i = 0; i < new_count; i ++) { 645 paddr_t cur_page = page; 646 647 pte = (pd_entry_t *) avail; 648 avail += PAGE_SIZE; 649 650 memset(pte, 0, PAGE_SIZE); 651 while (pl2_pi(page) == pl2_pi (cur_page)) { 652 if (page >= map_end) { 653 /* not mapped at all */ 654 pte[pl1_pi(page)] = 0; 655 page += PAGE_SIZE; 656 continue; 657 } 658 pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE); 659 if (page == (vaddr_t)HYPERVISOR_shared_info) { 660 pte[pl1_pi(page)] = xen_start_info.shared_info; 661 __PRINTK(("HYPERVISOR_shared_info " 662 "va 0x%lx pte 0x%lx\n", 663 HYPERVISOR_shared_info, pte[pl1_pi(page)])); 664 } 665 if (xpmap_ptom_masked(page - KERNBASE) == 666 (xen_start_info.console_mfn << PAGE_SHIFT)) { 667 xencons_interface = (void *)page; 668 pte[pl1_pi(page)] = 669 (xen_start_info.console_mfn << PAGE_SHIFT); 670 __PRINTK(("xencons_interface " 671 va 0x%lx pte 0x%lx\n", 672 xencons_interface, pte[pl1_pi(page)])); 673 } 674 if (xpmap_ptom_masked(page - KERNBASE) == 675 (xen_start_info.store_mfn << PAGE_SHIFT)) { 676 xenstore_interface = (void *)page; 677 pte[pl1_pi(page)] = 678 (xen_start_info.store_mfn << PAGE_SHIFT); 679 __PRINTK(("xenstore_interface " 680 "va 0x%lx pte 0x%lx\n", 681 xenstore_interface, pte[pl1_pi(page)])); 682 } 683#ifdef DOM0OPS 684 if (page >= (vaddr_t)atdevbase && 685 page < (vaddr_t)atdevbase + IOM_SIZE) { 686 pte[pl1_pi(page)] = 687 IOM_BEGIN + (page - (vaddr_t)atdevbase); 688 } 689#endif 690 pte[pl1_pi(page)] |= PG_u | PG_V; 691 if (page < text_end) { 692 /* map kernel text RO */ 693 pte[pl1_pi(page)] |= 0; 694 } else if (page >= old_pgd 695 && page < old_pgd + (old_count * PAGE_SIZE)) { 696 /* map old page tables RO */ 697 pte[pl1_pi(page)] |= 0; 698 } else if (page >= new_pgd && 699 page < new_pgd + ((new_count + 3) * PAGE_SIZE)) { 700 /* map new page tables RO */ 701 pte[pl1_pi(page)] |= 0; 702 } else { 703 /* map page RW */ 704 pte[pl1_pi(page)] |= PG_RW; 705 } 706 if (page == old_pgd) 707 __PRINTK(("va 0x%lx pa 0x%lx 708 "entry 0x%lx -> L1[0x%x]\n", 709 page, page - KERNBASE, 710 pte[pl1_pi(page)], pl1_pi(page))); 711 page += PAGE_SIZE; 712 } 713 714 addr = ((paddr_t) pte) - KERNBASE; 715 pde[pl2_pi(cur_page)] = 716 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V; 717 __PRINTK(("L1 va 0x%lx pa 0x%lx entry 0x%lx -> L2[0x%x]\n", 718 pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page))); 719 /* Mark readonly */ 720 xen_bt_set_readonly((vaddr_t) pte); 721 } 722 723 /* Install recursive page tables mapping */ 724 bt_pgd[PDIR_SLOT_PTE] = 725 xpmap_ptom_masked(new_pgd - KERNBASE) | PG_u | PG_V; 726 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%lx entry 0x%lx\n", 727 new_pgd, new_pgd - KERNBASE, bt_pgd[PDIR_SLOT_PTE])); 728 729 /* Mark tables RO */ 730 xen_bt_set_readonly((vaddr_t) pde); 731 xen_bt_set_readonly((vaddr_t) pdtpe); 732 xen_bt_set_readonly(new_pgd); 733 /* Pin the PGD */ 734 __PRINTK(("pin PDG\n")); 735 xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE)); 736 /* Switch to new tables */ 737 __PRINTK(("switch to PDG\n")); 738 xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE)); 739 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%lx\n", 740 bt_pgd[PDIR_SLOT_PTE])); 741 __PRINTK(("L4_BASE va 0x%lx\n", (long)L4_BASE)); 742 __PRINTK(("value 0x%lx\n", *L4_BASE)); 743 __PRINTK(("[PDIR_SLOT_PTE] 0x%lx\n", L4_BASE[PDIR_SLOT_PTE])); 744 745 /* Now we can safely reclaim space taken by old tables */ 746 747 __PRINTK(("unpin old PDG\n")); 748 /* Unpin old PGD */ 749 xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE)); 750 /* Mark old tables RW */ 751 page = old_pgd; 752 addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME; 753 addr = xpmap_mtop(addr); 754 pte = (pd_entry_t *) (addr + KERNBASE); 755 pte += pl1_pi(page); 756 __PRINTK(("*pde 0x%lx addr 0x%lx pte 0x%lx\n", 757 pde[pl2_pi(page)], addr, pte)); 758 while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) { 759 addr = xpmap_ptom(((paddr_t) pte) - KERNBASE); 760 xpq_queue_pte_update((pt_entry_t *) addr, *pte | PG_RW); 761 page += PAGE_SIZE; 762 /* 763 * Our ptes are contiguous 764 * so it's safe to just "++" here 765 */ 766 pte++; 767 } 768 xpq_flush_queue(); 769} 770 771 772void 773xen_set_user_pgd(paddr_t page) 774{ 775 struct mmuext_op op; 776 int s = splvm(); 777 778 xpq_flush_queue(); 779 op.cmd = MMUEXT_NEW_USER_BASEPTR; 780 op.arg1.mfn = xpmap_phys_to_machine_mapping[page >> PAGE_SHIFT]; 781 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0) 782 panic("xen_set_user_pgd: failed to install new user page" 783 " directory %lx", page); 784 splx(s); 785} 786 787/* 788 * Bootstrap helper functions 789 */ 790 791/* 792 * Mark a page readonly 793 * XXX: assuming vaddr = paddr + KERNBASE 794 */ 795 796static void 797xen_bt_set_readonly (vaddr_t page) 798{ 799 pt_entry_t entry; 800 801 entry = xpmap_ptom_masked(page - KERNBASE); 802 entry |= PG_u | PG_V; 803 804 HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG); 805} 806#endif /* x86_64 */ 807