1/* 2 * VMI specific paravirt-ops implementation 3 * 4 * Copyright (C) 2005, VMware, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, but 12 * WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 14 * NON INFRINGEMENT. See the GNU General Public License for more 15 * details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 * 21 * Send feedback to zach@vmware.com 22 * 23 */ 24 25#include <linux/module.h> 26#include <linux/cpu.h> 27#include <linux/bootmem.h> 28#include <linux/mm.h> 29#include <linux/highmem.h> 30#include <linux/sched.h> 31#include <linux/gfp.h> 32#include <asm/vmi.h> 33#include <asm/io.h> 34#include <asm/fixmap.h> 35#include <asm/apicdef.h> 36#include <asm/apic.h> 37#include <asm/pgalloc.h> 38#include <asm/processor.h> 39#include <asm/timer.h> 40#include <asm/vmi_time.h> 41#include <asm/kmap_types.h> 42#include <asm/setup.h> 43 44/* Convenient for calling VMI functions indirectly in the ROM */ 45typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); 46typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); 47 48#define call_vrom_func(rom,func) \ 49 (((VROMFUNC *)(rom->func))()) 50 51#define call_vrom_long_func(rom,func,arg) \ 52 (((VROMLONGFUNC *)(rom->func)) (arg)) 53 54static struct vrom_header *vmi_rom; 55static int disable_pge; 56static int disable_pse; 57static int disable_sep; 58static int disable_tsc; 59static int disable_mtrr; 60static int disable_noidle; 61static int disable_vmi_timer; 62 63/* Cached VMI operations */ 64static struct { 65 void (*cpuid)(void /* non-c */); 66 void (*_set_ldt)(u32 selector); 67 void (*set_tr)(u32 selector); 68 void (*write_idt_entry)(struct desc_struct *, int, u32, u32); 69 void (*write_gdt_entry)(struct desc_struct *, int, u32, u32); 70 void (*write_ldt_entry)(struct desc_struct *, int, u32, u32); 71 void (*set_kernel_stack)(u32 selector, u32 sp0); 72 void (*allocate_page)(u32, u32, u32, u32, u32); 73 void (*release_page)(u32, u32); 74 void (*set_pte)(pte_t, pte_t *, unsigned); 75 void (*update_pte)(pte_t *, unsigned); 76 void (*set_linear_mapping)(int, void *, u32, u32); 77 void (*_flush_tlb)(int); 78 void (*set_initial_ap_state)(int, int); 79 void (*halt)(void); 80 void (*set_lazy_mode)(int mode); 81} vmi_ops; 82 83/* Cached VMI operations */ 84struct vmi_timer_ops vmi_timer_ops; 85 86/* 87 * VMI patching routines. 88 */ 89#define MNEM_CALL 0xe8 90#define MNEM_JMP 0xe9 91#define MNEM_RET 0xc3 92 93#define IRQ_PATCH_INT_MASK 0 94#define IRQ_PATCH_DISABLE 5 95 96static inline void patch_offset(void *insnbuf, 97 unsigned long ip, unsigned long dest) 98{ 99 *(unsigned long *)(insnbuf+1) = dest-ip-5; 100} 101 102static unsigned patch_internal(int call, unsigned len, void *insnbuf, 103 unsigned long ip) 104{ 105 u64 reloc; 106 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; 107 reloc = call_vrom_long_func(vmi_rom, get_reloc, call); 108 switch(rel->type) { 109 case VMI_RELOCATION_CALL_REL: 110 BUG_ON(len < 5); 111 *(char *)insnbuf = MNEM_CALL; 112 patch_offset(insnbuf, ip, (unsigned long)rel->eip); 113 return 5; 114 115 case VMI_RELOCATION_JUMP_REL: 116 BUG_ON(len < 5); 117 *(char *)insnbuf = MNEM_JMP; 118 patch_offset(insnbuf, ip, (unsigned long)rel->eip); 119 return 5; 120 121 case VMI_RELOCATION_NOP: 122 /* obliterate the whole thing */ 123 return 0; 124 125 case VMI_RELOCATION_NONE: 126 /* leave native code in place */ 127 break; 128 129 default: 130 BUG(); 131 } 132 return len; 133} 134 135/* 136 * Apply patch if appropriate, return length of new instruction 137 * sequence. The callee does nop padding for us. 138 */ 139static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, 140 unsigned long ip, unsigned len) 141{ 142 switch (type) { 143 case PARAVIRT_PATCH(pv_irq_ops.irq_disable): 144 return patch_internal(VMI_CALL_DisableInterrupts, len, 145 insns, ip); 146 case PARAVIRT_PATCH(pv_irq_ops.irq_enable): 147 return patch_internal(VMI_CALL_EnableInterrupts, len, 148 insns, ip); 149 case PARAVIRT_PATCH(pv_irq_ops.restore_fl): 150 return patch_internal(VMI_CALL_SetInterruptMask, len, 151 insns, ip); 152 case PARAVIRT_PATCH(pv_irq_ops.save_fl): 153 return patch_internal(VMI_CALL_GetInterruptMask, len, 154 insns, ip); 155 case PARAVIRT_PATCH(pv_cpu_ops.iret): 156 return patch_internal(VMI_CALL_IRET, len, insns, ip); 157 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): 158 return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); 159 default: 160 break; 161 } 162 return len; 163} 164 165/* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ 166static void vmi_cpuid(unsigned int *ax, unsigned int *bx, 167 unsigned int *cx, unsigned int *dx) 168{ 169 int override = 0; 170 if (*ax == 1) 171 override = 1; 172 asm volatile ("call *%6" 173 : "=a" (*ax), 174 "=b" (*bx), 175 "=c" (*cx), 176 "=d" (*dx) 177 : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid)); 178 if (override) { 179 if (disable_pse) 180 *dx &= ~X86_FEATURE_PSE; 181 if (disable_pge) 182 *dx &= ~X86_FEATURE_PGE; 183 if (disable_sep) 184 *dx &= ~X86_FEATURE_SEP; 185 if (disable_tsc) 186 *dx &= ~X86_FEATURE_TSC; 187 if (disable_mtrr) 188 *dx &= ~X86_FEATURE_MTRR; 189 } 190} 191 192static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) 193{ 194 if (gdt[nr].a != new->a || gdt[nr].b != new->b) 195 write_gdt_entry(gdt, nr, new, 0); 196} 197 198static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) 199{ 200 struct desc_struct *gdt = get_cpu_gdt_table(cpu); 201 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]); 202 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]); 203 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]); 204} 205 206static void vmi_set_ldt(const void *addr, unsigned entries) 207{ 208 unsigned cpu = smp_processor_id(); 209 struct desc_struct desc; 210 211 pack_descriptor(&desc, (unsigned long)addr, 212 entries * sizeof(struct desc_struct) - 1, 213 DESC_LDT, 0); 214 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT); 215 vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); 216} 217 218static void vmi_set_tr(void) 219{ 220 vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); 221} 222 223static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) 224{ 225 u32 *idt_entry = (u32 *)g; 226 vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]); 227} 228 229static void vmi_write_gdt_entry(struct desc_struct *dt, int entry, 230 const void *desc, int type) 231{ 232 u32 *gdt_entry = (u32 *)desc; 233 vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]); 234} 235 236static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, 237 const void *desc) 238{ 239 u32 *ldt_entry = (u32 *)desc; 240 vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); 241} 242 243static void vmi_load_sp0(struct tss_struct *tss, 244 struct thread_struct *thread) 245{ 246 tss->x86_tss.sp0 = thread->sp0; 247 248 /* This can only happen when SEP is enabled, no need to test "SEP"arately */ 249 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { 250 tss->x86_tss.ss1 = thread->sysenter_cs; 251 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); 252 } 253 vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0); 254} 255 256static void vmi_flush_tlb_user(void) 257{ 258 vmi_ops._flush_tlb(VMI_FLUSH_TLB); 259} 260 261static void vmi_flush_tlb_kernel(void) 262{ 263 vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); 264} 265 266/* Stub to do nothing at all; used for delays and unimplemented calls */ 267static void vmi_nop(void) 268{ 269} 270 271static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) 272{ 273 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 274} 275 276static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) 277{ 278 /* 279 * This call comes in very early, before mem_map is setup. 280 * It is called only for swapper_pg_dir, which already has 281 * data on it. 282 */ 283 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); 284} 285 286static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) 287{ 288 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); 289} 290 291static void vmi_release_pte(unsigned long pfn) 292{ 293 vmi_ops.release_page(pfn, VMI_PAGE_L1); 294} 295 296static void vmi_release_pmd(unsigned long pfn) 297{ 298 vmi_ops.release_page(pfn, VMI_PAGE_L2); 299} 300 301/* 302 * We use the pgd_free hook for releasing the pgd page: 303 */ 304static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) 305{ 306 unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; 307 308 vmi_ops.release_page(pfn, VMI_PAGE_L2); 309} 310 311/* 312 * Helper macros for MMU update flags. We can defer updates until a flush 313 * or page invalidation only if the update is to the current address space 314 * (otherwise, there is no flush). We must check against init_mm, since 315 * this could be a kernel update, which usually passes init_mm, although 316 * sometimes this check can be skipped if we know the particular function 317 * is only called on user mode PTEs. We could change the kernel to pass 318 * current->active_mm here, but in particular, I was unsure if changing 319 * mm/highmem.c to do this would still be correct on other architectures. 320 */ 321#define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \ 322 (!mustbeuser && (mm) == &init_mm)) 323#define vmi_flags_addr(mm, addr, level, user) \ 324 ((level) | (is_current_as(mm, user) ? \ 325 (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) 326#define vmi_flags_addr_defer(mm, addr, level, user) \ 327 ((level) | (is_current_as(mm, user) ? \ 328 (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) 329 330static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 331{ 332 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 333} 334 335static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 336{ 337 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); 338} 339 340static void vmi_set_pte(pte_t *ptep, pte_t pte) 341{ 342 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); 343} 344 345static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 346{ 347 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 348} 349 350static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) 351{ 352#ifdef CONFIG_X86_PAE 353 const pte_t pte = { .pte = pmdval.pmd }; 354#else 355 const pte_t pte = { pmdval.pud.pgd.pgd }; 356#endif 357 vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); 358} 359 360#ifdef CONFIG_X86_PAE 361 362static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) 363{ 364 set_64bit((unsigned long long *)ptep,pte_val(pteval)); 365 vmi_ops.update_pte(ptep, VMI_PAGE_PT); 366} 367 368static void vmi_set_pud(pud_t *pudp, pud_t pudval) 369{ 370 /* Um, eww */ 371 const pte_t pte = { .pte = pudval.pgd.pgd }; 372 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); 373} 374 375static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 376{ 377 const pte_t pte = { .pte = 0 }; 378 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 379} 380 381static void vmi_pmd_clear(pmd_t *pmd) 382{ 383 const pte_t pte = { .pte = 0 }; 384 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); 385} 386#endif 387 388#ifdef CONFIG_SMP 389static void __devinit 390vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, 391 unsigned long start_esp) 392{ 393 struct vmi_ap_state ap; 394 395 /* Default everything to zero. This is fine for most GPRs. */ 396 memset(&ap, 0, sizeof(struct vmi_ap_state)); 397 398 ap.gdtr_limit = GDT_SIZE - 1; 399 ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid); 400 401 ap.idtr_limit = IDT_ENTRIES * 8 - 1; 402 ap.idtr_base = (unsigned long) idt_table; 403 404 ap.ldtr = 0; 405 406 ap.cs = __KERNEL_CS; 407 ap.eip = (unsigned long) start_eip; 408 ap.ss = __KERNEL_DS; 409 ap.esp = (unsigned long) start_esp; 410 411 ap.ds = __USER_DS; 412 ap.es = __USER_DS; 413 ap.fs = __KERNEL_PERCPU; 414 ap.gs = __KERNEL_STACK_CANARY; 415 416 ap.eflags = 0; 417 418#ifdef CONFIG_X86_PAE 419 /* efer should match BSP efer. */ 420 if (cpu_has_nx) { 421 unsigned l, h; 422 rdmsr(MSR_EFER, l, h); 423 ap.efer = (unsigned long long) h << 32 | l; 424 } 425#endif 426 427 ap.cr3 = __pa(swapper_pg_dir); 428 /* Protected mode, paging, AM, WP, NE, MP. */ 429 ap.cr0 = 0x80050023; 430 ap.cr4 = mmu_cr4_features; 431 vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid); 432} 433#endif 434 435static void vmi_start_context_switch(struct task_struct *prev) 436{ 437 paravirt_start_context_switch(prev); 438 vmi_ops.set_lazy_mode(2); 439} 440 441static void vmi_end_context_switch(struct task_struct *next) 442{ 443 vmi_ops.set_lazy_mode(0); 444 paravirt_end_context_switch(next); 445} 446 447static void vmi_enter_lazy_mmu(void) 448{ 449 paravirt_enter_lazy_mmu(); 450 vmi_ops.set_lazy_mode(1); 451} 452 453static void vmi_leave_lazy_mmu(void) 454{ 455 vmi_ops.set_lazy_mode(0); 456 paravirt_leave_lazy_mmu(); 457} 458 459static inline int __init check_vmi_rom(struct vrom_header *rom) 460{ 461 struct pci_header *pci; 462 struct pnp_header *pnp; 463 const char *manufacturer = "UNKNOWN"; 464 const char *product = "UNKNOWN"; 465 const char *license = "unspecified"; 466 467 if (rom->rom_signature != 0xaa55) 468 return 0; 469 if (rom->vrom_signature != VMI_SIGNATURE) 470 return 0; 471 if (rom->api_version_maj != VMI_API_REV_MAJOR || 472 rom->api_version_min+1 < VMI_API_REV_MINOR+1) { 473 printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n", 474 rom->api_version_maj, 475 rom->api_version_min); 476 return 0; 477 } 478 479 /* 480 * Relying on the VMI_SIGNATURE field is not 100% safe, so check 481 * the PCI header and device type to make sure this is really a 482 * VMI device. 483 */ 484 if (!rom->pci_header_offs) { 485 printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n"); 486 return 0; 487 } 488 489 pci = (struct pci_header *)((char *)rom+rom->pci_header_offs); 490 if (pci->vendorID != PCI_VENDOR_ID_VMWARE || 491 pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) { 492 /* Allow it to run... anyways, but warn */ 493 printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n"); 494 } 495 496 if (rom->pnp_header_offs) { 497 pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs); 498 if (pnp->manufacturer_offset) 499 manufacturer = (const char *)rom+pnp->manufacturer_offset; 500 if (pnp->product_offset) 501 product = (const char *)rom+pnp->product_offset; 502 } 503 504 if (rom->license_offs) 505 license = (char *)rom+rom->license_offs; 506 507 printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n", 508 manufacturer, product, 509 rom->api_version_maj, rom->api_version_min, 510 pci->rom_version_maj, pci->rom_version_min); 511 512 /* Don't allow BSD/MIT here for now because we don't want to end up 513 with any binary only shim layers */ 514 if (strcmp(license, "GPL") && strcmp(license, "GPL v2")) { 515 printk(KERN_WARNING "VMI: Non GPL license `%s' found for ROM. Not used.\n", 516 license); 517 return 0; 518 } 519 520 return 1; 521} 522 523/* 524 * Probe for the VMI option ROM 525 */ 526static inline int __init probe_vmi_rom(void) 527{ 528 unsigned long base; 529 530 /* VMI ROM is in option ROM area, check signature */ 531 for (base = 0xC0000; base < 0xE0000; base += 2048) { 532 struct vrom_header *romstart; 533 romstart = (struct vrom_header *)isa_bus_to_virt(base); 534 if (check_vmi_rom(romstart)) { 535 vmi_rom = romstart; 536 return 1; 537 } 538 } 539 return 0; 540} 541 542/* 543 * VMI setup common to all processors 544 */ 545void vmi_bringup(void) 546{ 547 /* We must establish the lowmem mapping for MMU ops to work */ 548 if (vmi_ops.set_linear_mapping) 549 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); 550} 551 552/* 553 * Return a pointer to a VMI function or NULL if unimplemented 554 */ 555static void *vmi_get_function(int vmicall) 556{ 557 u64 reloc; 558 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; 559 reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall); 560 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); 561 if (rel->type == VMI_RELOCATION_CALL_REL) 562 return (void *)rel->eip; 563 else 564 return NULL; 565} 566 567/* 568 * Helper macro for making the VMI paravirt-ops fill code readable. 569 * For unimplemented operations, fall back to default, unless nop 570 * is returned by the ROM. 571 */ 572#define para_fill(opname, vmicall) \ 573do { \ 574 reloc = call_vrom_long_func(vmi_rom, get_reloc, \ 575 VMI_CALL_##vmicall); \ 576 if (rel->type == VMI_RELOCATION_CALL_REL) \ 577 opname = (void *)rel->eip; \ 578 else if (rel->type == VMI_RELOCATION_NOP) \ 579 opname = (void *)vmi_nop; \ 580 else if (rel->type != VMI_RELOCATION_NONE) \ 581 printk(KERN_WARNING "VMI: Unknown relocation " \ 582 "type %d for " #vmicall"\n",\ 583 rel->type); \ 584} while (0) 585 586/* 587 * Helper macro for making the VMI paravirt-ops fill code readable. 588 * For cached operations which do not match the VMI ROM ABI and must 589 * go through a tranlation stub. Ignore NOPs, since it is not clear 590 * a NOP * VMI function corresponds to a NOP paravirt-op when the 591 * functions are not in 1-1 correspondence. 592 */ 593#define para_wrap(opname, wrapper, cache, vmicall) \ 594do { \ 595 reloc = call_vrom_long_func(vmi_rom, get_reloc, \ 596 VMI_CALL_##vmicall); \ 597 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ 598 if (rel->type == VMI_RELOCATION_CALL_REL) { \ 599 opname = wrapper; \ 600 vmi_ops.cache = (void *)rel->eip; \ 601 } \ 602} while (0) 603 604/* 605 * Activate the VMI interface and switch into paravirtualized mode 606 */ 607static inline int __init activate_vmi(void) 608{ 609 short kernel_cs; 610 u64 reloc; 611 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; 612 613 /* 614 * Prevent page tables from being allocated in highmem, even if 615 * CONFIG_HIGHPTE is enabled. 616 */ 617 __userpte_alloc_gfp &= ~__GFP_HIGHMEM; 618 619 if (call_vrom_func(vmi_rom, vmi_init) != 0) { 620 printk(KERN_ERR "VMI ROM failed to initialize!"); 621 return 0; 622 } 623 savesegment(cs, kernel_cs); 624 625 pv_info.paravirt_enabled = 1; 626 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; 627 pv_info.name = "vmi [deprecated]"; 628 629 pv_init_ops.patch = vmi_patch; 630 631 /* 632 * Many of these operations are ABI compatible with VMI. 633 * This means we can fill in the paravirt-ops with direct 634 * pointers into the VMI ROM. If the calling convention for 635 * these operations changes, this code needs to be updated. 636 * 637 * Exceptions 638 * CPUID paravirt-op uses pointers, not the native ISA 639 * halt has no VMI equivalent; all VMI halts are "safe" 640 * no MSR support yet - just trap and emulate. VMI uses the 641 * same ABI as the native ISA, but Linux wants exceptions 642 * from bogus MSR read / write handled 643 * rdpmc is not yet used in Linux 644 */ 645 646 /* CPUID is special, so very special it gets wrapped like a present */ 647 para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID); 648 649 para_fill(pv_cpu_ops.clts, CLTS); 650 para_fill(pv_cpu_ops.get_debugreg, GetDR); 651 para_fill(pv_cpu_ops.set_debugreg, SetDR); 652 para_fill(pv_cpu_ops.read_cr0, GetCR0); 653 para_fill(pv_mmu_ops.read_cr2, GetCR2); 654 para_fill(pv_mmu_ops.read_cr3, GetCR3); 655 para_fill(pv_cpu_ops.read_cr4, GetCR4); 656 para_fill(pv_cpu_ops.write_cr0, SetCR0); 657 para_fill(pv_mmu_ops.write_cr2, SetCR2); 658 para_fill(pv_mmu_ops.write_cr3, SetCR3); 659 para_fill(pv_cpu_ops.write_cr4, SetCR4); 660 661 para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); 662 para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); 663 para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); 664 para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); 665 666 para_fill(pv_cpu_ops.wbinvd, WBINVD); 667 para_fill(pv_cpu_ops.read_tsc, RDTSC); 668 669 /* The following we emulate with trap and emulate for now */ 670 /* paravirt_ops.read_msr = vmi_rdmsr */ 671 /* paravirt_ops.write_msr = vmi_wrmsr */ 672 /* paravirt_ops.rdpmc = vmi_rdpmc */ 673 674 /* TR interface doesn't pass TR value, wrap */ 675 para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR); 676 677 /* LDT is special, too */ 678 para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT); 679 680 para_fill(pv_cpu_ops.load_gdt, SetGDT); 681 para_fill(pv_cpu_ops.load_idt, SetIDT); 682 para_fill(pv_cpu_ops.store_gdt, GetGDT); 683 para_fill(pv_cpu_ops.store_idt, GetIDT); 684 para_fill(pv_cpu_ops.store_tr, GetTR); 685 pv_cpu_ops.load_tls = vmi_load_tls; 686 para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry, 687 write_ldt_entry, WriteLDTEntry); 688 para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry, 689 write_gdt_entry, WriteGDTEntry); 690 para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry, 691 write_idt_entry, WriteIDTEntry); 692 para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack); 693 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); 694 para_fill(pv_cpu_ops.io_delay, IODelay); 695 696 para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, 697 set_lazy_mode, SetLazyMode); 698 para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, 699 set_lazy_mode, SetLazyMode); 700 701 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, 702 set_lazy_mode, SetLazyMode); 703 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, 704 set_lazy_mode, SetLazyMode); 705 706 /* user and kernel flush are just handled with different flags to FlushTLB */ 707 para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); 708 para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); 709 para_fill(pv_mmu_ops.flush_tlb_single, InvalPage); 710 711 /* 712 * Until a standard flag format can be agreed on, we need to 713 * implement these as wrappers in Linux. Get the VMI ROM 714 * function pointers for the two backend calls. 715 */ 716#ifdef CONFIG_X86_PAE 717 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong); 718 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong); 719#else 720 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); 721 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); 722#endif 723 724 if (vmi_ops.set_pte) { 725 pv_mmu_ops.set_pte = vmi_set_pte; 726 pv_mmu_ops.set_pte_at = vmi_set_pte_at; 727 pv_mmu_ops.set_pmd = vmi_set_pmd; 728#ifdef CONFIG_X86_PAE 729 pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic; 730 pv_mmu_ops.set_pud = vmi_set_pud; 731 pv_mmu_ops.pte_clear = vmi_pte_clear; 732 pv_mmu_ops.pmd_clear = vmi_pmd_clear; 733#endif 734 } 735 736 if (vmi_ops.update_pte) { 737 pv_mmu_ops.pte_update = vmi_update_pte; 738 pv_mmu_ops.pte_update_defer = vmi_update_pte_defer; 739 } 740 741 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); 742 if (vmi_ops.allocate_page) { 743 pv_mmu_ops.alloc_pte = vmi_allocate_pte; 744 pv_mmu_ops.alloc_pmd = vmi_allocate_pmd; 745 pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone; 746 } 747 748 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); 749 if (vmi_ops.release_page) { 750 pv_mmu_ops.release_pte = vmi_release_pte; 751 pv_mmu_ops.release_pmd = vmi_release_pmd; 752 pv_mmu_ops.pgd_free = vmi_pgd_free; 753 } 754 755 /* Set linear is needed in all cases */ 756 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); 757 758 /* 759 * These MUST always be patched. Don't support indirect jumps 760 * through these operations, as the VMI interface may use either 761 * a jump or a call to get to these operations, depending on 762 * the backend. They are performance critical anyway, so requiring 763 * a patch is not a big problem. 764 */ 765 pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; 766 pv_cpu_ops.iret = (void *)0xbadbab0; 767 768#ifdef CONFIG_SMP 769 para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); 770#endif 771 772#ifdef CONFIG_X86_LOCAL_APIC 773 para_fill(apic->read, APICRead); 774 para_fill(apic->write, APICWrite); 775#endif 776 777 /* 778 * Check for VMI timer functionality by probing for a cycle frequency method 779 */ 780 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); 781 if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) { 782 vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; 783 vmi_timer_ops.get_cycle_counter = 784 vmi_get_function(VMI_CALL_GetCycleCounter); 785 vmi_timer_ops.get_wallclock = 786 vmi_get_function(VMI_CALL_GetWallclockTime); 787 vmi_timer_ops.wallclock_updated = 788 vmi_get_function(VMI_CALL_WallclockUpdated); 789 vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); 790 vmi_timer_ops.cancel_alarm = 791 vmi_get_function(VMI_CALL_CancelAlarm); 792 x86_init.timers.timer_init = vmi_time_init; 793#ifdef CONFIG_X86_LOCAL_APIC 794 x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init; 795 x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init; 796#endif 797 pv_time_ops.sched_clock = vmi_sched_clock; 798 x86_platform.calibrate_tsc = vmi_tsc_khz; 799 x86_platform.get_wallclock = vmi_get_wallclock; 800 x86_platform.set_wallclock = vmi_set_wallclock; 801 802 /* We have true wallclock functions; disable CMOS clock sync */ 803 no_sync_cmos_clock = 1; 804 } else { 805 disable_noidle = 1; 806 disable_vmi_timer = 1; 807 } 808 809 para_fill(pv_irq_ops.safe_halt, Halt); 810 811 /* 812 * Alternative instruction rewriting doesn't happen soon enough 813 * to convert VMI_IRET to a call instead of a jump; so we have 814 * to do this before IRQs get reenabled. Fortunately, it is 815 * idempotent. 816 */ 817 apply_paravirt(__parainstructions, __parainstructions_end); 818 819 vmi_bringup(); 820 821 return 1; 822} 823 824#undef para_fill 825 826void __init vmi_init(void) 827{ 828 if (!vmi_rom) 829 probe_vmi_rom(); 830 else 831 check_vmi_rom(vmi_rom); 832 833 /* In case probing for or validating the ROM failed, basil */ 834 if (!vmi_rom) 835 return; 836 837 reserve_top_address(-vmi_rom->virtual_top); 838 839#ifdef CONFIG_X86_IO_APIC 840 /* This is virtual hardware; timer routing is wired correctly */ 841 no_timer_check = 1; 842#endif 843} 844 845void __init vmi_activate(void) 846{ 847 unsigned long flags; 848 849 if (!vmi_rom) 850 return; 851 852 local_irq_save(flags); 853 activate_vmi(); 854 local_irq_restore(flags & X86_EFLAGS_IF); 855} 856 857static int __init parse_vmi(char *arg) 858{ 859 if (!arg) 860 return -EINVAL; 861 862 if (!strcmp(arg, "disable_pge")) { 863 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); 864 disable_pge = 1; 865 } else if (!strcmp(arg, "disable_pse")) { 866 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE); 867 disable_pse = 1; 868 } else if (!strcmp(arg, "disable_sep")) { 869 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); 870 disable_sep = 1; 871 } else if (!strcmp(arg, "disable_tsc")) { 872 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC); 873 disable_tsc = 1; 874 } else if (!strcmp(arg, "disable_mtrr")) { 875 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR); 876 disable_mtrr = 1; 877 } else if (!strcmp(arg, "disable_timer")) { 878 disable_vmi_timer = 1; 879 disable_noidle = 1; 880 } else if (!strcmp(arg, "disable_noidle")) 881 disable_noidle = 1; 882 return 0; 883} 884 885early_param("vmi", parse_vmi); 886