nvmm_x86_svm.c revision 1.37
1/* $NetBSD: nvmm_x86_svm.c,v 1.37 2019/04/06 11:49:53 maxv Exp $ */ 2 3/* 4 * Copyright (c) 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.37 2019/04/06 11:49:53 maxv Exp $"); 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/kmem.h> 39#include <sys/cpu.h> 40#include <sys/xcall.h> 41#include <sys/mman.h> 42 43#include <uvm/uvm.h> 44#include <uvm/uvm_page.h> 45 46#include <x86/cputypes.h> 47#include <x86/specialreg.h> 48#include <x86/pmap.h> 49#include <x86/dbregs.h> 50#include <x86/cpu_counter.h> 51#include <machine/cpuvar.h> 52 53#include <dev/nvmm/nvmm.h> 54#include <dev/nvmm/nvmm_internal.h> 55#include <dev/nvmm/x86/nvmm_x86.h> 56 57int svm_vmrun(paddr_t, uint64_t *); 58 59#define MSR_VM_HSAVE_PA 0xC0010117 60 61/* -------------------------------------------------------------------------- */ 62 63#define VMCB_EXITCODE_CR0_READ 0x0000 64#define VMCB_EXITCODE_CR1_READ 0x0001 65#define VMCB_EXITCODE_CR2_READ 0x0002 66#define VMCB_EXITCODE_CR3_READ 0x0003 67#define VMCB_EXITCODE_CR4_READ 0x0004 68#define VMCB_EXITCODE_CR5_READ 0x0005 69#define VMCB_EXITCODE_CR6_READ 0x0006 70#define VMCB_EXITCODE_CR7_READ 0x0007 71#define VMCB_EXITCODE_CR8_READ 0x0008 72#define VMCB_EXITCODE_CR9_READ 0x0009 73#define VMCB_EXITCODE_CR10_READ 0x000A 74#define VMCB_EXITCODE_CR11_READ 0x000B 75#define VMCB_EXITCODE_CR12_READ 0x000C 76#define VMCB_EXITCODE_CR13_READ 0x000D 77#define VMCB_EXITCODE_CR14_READ 0x000E 78#define VMCB_EXITCODE_CR15_READ 0x000F 79#define VMCB_EXITCODE_CR0_WRITE 0x0010 80#define VMCB_EXITCODE_CR1_WRITE 0x0011 81#define VMCB_EXITCODE_CR2_WRITE 0x0012 82#define VMCB_EXITCODE_CR3_WRITE 0x0013 83#define VMCB_EXITCODE_CR4_WRITE 0x0014 84#define VMCB_EXITCODE_CR5_WRITE 0x0015 85#define VMCB_EXITCODE_CR6_WRITE 0x0016 86#define VMCB_EXITCODE_CR7_WRITE 0x0017 87#define VMCB_EXITCODE_CR8_WRITE 0x0018 88#define VMCB_EXITCODE_CR9_WRITE 0x0019 89#define VMCB_EXITCODE_CR10_WRITE 0x001A 90#define VMCB_EXITCODE_CR11_WRITE 0x001B 91#define VMCB_EXITCODE_CR12_WRITE 0x001C 92#define VMCB_EXITCODE_CR13_WRITE 0x001D 93#define VMCB_EXITCODE_CR14_WRITE 0x001E 94#define VMCB_EXITCODE_CR15_WRITE 0x001F 95#define VMCB_EXITCODE_DR0_READ 0x0020 96#define VMCB_EXITCODE_DR1_READ 0x0021 97#define VMCB_EXITCODE_DR2_READ 0x0022 98#define VMCB_EXITCODE_DR3_READ 0x0023 99#define VMCB_EXITCODE_DR4_READ 0x0024 100#define VMCB_EXITCODE_DR5_READ 0x0025 101#define VMCB_EXITCODE_DR6_READ 0x0026 102#define VMCB_EXITCODE_DR7_READ 0x0027 103#define VMCB_EXITCODE_DR8_READ 0x0028 104#define VMCB_EXITCODE_DR9_READ 0x0029 105#define VMCB_EXITCODE_DR10_READ 0x002A 106#define VMCB_EXITCODE_DR11_READ 0x002B 107#define VMCB_EXITCODE_DR12_READ 0x002C 108#define VMCB_EXITCODE_DR13_READ 0x002D 109#define VMCB_EXITCODE_DR14_READ 0x002E 110#define VMCB_EXITCODE_DR15_READ 0x002F 111#define VMCB_EXITCODE_DR0_WRITE 0x0030 112#define VMCB_EXITCODE_DR1_WRITE 0x0031 113#define VMCB_EXITCODE_DR2_WRITE 0x0032 114#define VMCB_EXITCODE_DR3_WRITE 0x0033 115#define VMCB_EXITCODE_DR4_WRITE 0x0034 116#define VMCB_EXITCODE_DR5_WRITE 0x0035 117#define VMCB_EXITCODE_DR6_WRITE 0x0036 118#define VMCB_EXITCODE_DR7_WRITE 0x0037 119#define VMCB_EXITCODE_DR8_WRITE 0x0038 120#define VMCB_EXITCODE_DR9_WRITE 0x0039 121#define VMCB_EXITCODE_DR10_WRITE 0x003A 122#define VMCB_EXITCODE_DR11_WRITE 0x003B 123#define VMCB_EXITCODE_DR12_WRITE 0x003C 124#define VMCB_EXITCODE_DR13_WRITE 0x003D 125#define VMCB_EXITCODE_DR14_WRITE 0x003E 126#define VMCB_EXITCODE_DR15_WRITE 0x003F 127#define VMCB_EXITCODE_EXCP0 0x0040 128#define VMCB_EXITCODE_EXCP1 0x0041 129#define VMCB_EXITCODE_EXCP2 0x0042 130#define VMCB_EXITCODE_EXCP3 0x0043 131#define VMCB_EXITCODE_EXCP4 0x0044 132#define VMCB_EXITCODE_EXCP5 0x0045 133#define VMCB_EXITCODE_EXCP6 0x0046 134#define VMCB_EXITCODE_EXCP7 0x0047 135#define VMCB_EXITCODE_EXCP8 0x0048 136#define VMCB_EXITCODE_EXCP9 0x0049 137#define VMCB_EXITCODE_EXCP10 0x004A 138#define VMCB_EXITCODE_EXCP11 0x004B 139#define VMCB_EXITCODE_EXCP12 0x004C 140#define VMCB_EXITCODE_EXCP13 0x004D 141#define VMCB_EXITCODE_EXCP14 0x004E 142#define VMCB_EXITCODE_EXCP15 0x004F 143#define VMCB_EXITCODE_EXCP16 0x0050 144#define VMCB_EXITCODE_EXCP17 0x0051 145#define VMCB_EXITCODE_EXCP18 0x0052 146#define VMCB_EXITCODE_EXCP19 0x0053 147#define VMCB_EXITCODE_EXCP20 0x0054 148#define VMCB_EXITCODE_EXCP21 0x0055 149#define VMCB_EXITCODE_EXCP22 0x0056 150#define VMCB_EXITCODE_EXCP23 0x0057 151#define VMCB_EXITCODE_EXCP24 0x0058 152#define VMCB_EXITCODE_EXCP25 0x0059 153#define VMCB_EXITCODE_EXCP26 0x005A 154#define VMCB_EXITCODE_EXCP27 0x005B 155#define VMCB_EXITCODE_EXCP28 0x005C 156#define VMCB_EXITCODE_EXCP29 0x005D 157#define VMCB_EXITCODE_EXCP30 0x005E 158#define VMCB_EXITCODE_EXCP31 0x005F 159#define VMCB_EXITCODE_INTR 0x0060 160#define VMCB_EXITCODE_NMI 0x0061 161#define VMCB_EXITCODE_SMI 0x0062 162#define VMCB_EXITCODE_INIT 0x0063 163#define VMCB_EXITCODE_VINTR 0x0064 164#define VMCB_EXITCODE_CR0_SEL_WRITE 0x0065 165#define VMCB_EXITCODE_IDTR_READ 0x0066 166#define VMCB_EXITCODE_GDTR_READ 0x0067 167#define VMCB_EXITCODE_LDTR_READ 0x0068 168#define VMCB_EXITCODE_TR_READ 0x0069 169#define VMCB_EXITCODE_IDTR_WRITE 0x006A 170#define VMCB_EXITCODE_GDTR_WRITE 0x006B 171#define VMCB_EXITCODE_LDTR_WRITE 0x006C 172#define VMCB_EXITCODE_TR_WRITE 0x006D 173#define VMCB_EXITCODE_RDTSC 0x006E 174#define VMCB_EXITCODE_RDPMC 0x006F 175#define VMCB_EXITCODE_PUSHF 0x0070 176#define VMCB_EXITCODE_POPF 0x0071 177#define VMCB_EXITCODE_CPUID 0x0072 178#define VMCB_EXITCODE_RSM 0x0073 179#define VMCB_EXITCODE_IRET 0x0074 180#define VMCB_EXITCODE_SWINT 0x0075 181#define VMCB_EXITCODE_INVD 0x0076 182#define VMCB_EXITCODE_PAUSE 0x0077 183#define VMCB_EXITCODE_HLT 0x0078 184#define VMCB_EXITCODE_INVLPG 0x0079 185#define VMCB_EXITCODE_INVLPGA 0x007A 186#define VMCB_EXITCODE_IOIO 0x007B 187#define VMCB_EXITCODE_MSR 0x007C 188#define VMCB_EXITCODE_TASK_SWITCH 0x007D 189#define VMCB_EXITCODE_FERR_FREEZE 0x007E 190#define VMCB_EXITCODE_SHUTDOWN 0x007F 191#define VMCB_EXITCODE_VMRUN 0x0080 192#define VMCB_EXITCODE_VMMCALL 0x0081 193#define VMCB_EXITCODE_VMLOAD 0x0082 194#define VMCB_EXITCODE_VMSAVE 0x0083 195#define VMCB_EXITCODE_STGI 0x0084 196#define VMCB_EXITCODE_CLGI 0x0085 197#define VMCB_EXITCODE_SKINIT 0x0086 198#define VMCB_EXITCODE_RDTSCP 0x0087 199#define VMCB_EXITCODE_ICEBP 0x0088 200#define VMCB_EXITCODE_WBINVD 0x0089 201#define VMCB_EXITCODE_MONITOR 0x008A 202#define VMCB_EXITCODE_MWAIT 0x008B 203#define VMCB_EXITCODE_MWAIT_CONDITIONAL 0x008C 204#define VMCB_EXITCODE_XSETBV 0x008D 205#define VMCB_EXITCODE_EFER_WRITE_TRAP 0x008F 206#define VMCB_EXITCODE_CR0_WRITE_TRAP 0x0090 207#define VMCB_EXITCODE_CR1_WRITE_TRAP 0x0091 208#define VMCB_EXITCODE_CR2_WRITE_TRAP 0x0092 209#define VMCB_EXITCODE_CR3_WRITE_TRAP 0x0093 210#define VMCB_EXITCODE_CR4_WRITE_TRAP 0x0094 211#define VMCB_EXITCODE_CR5_WRITE_TRAP 0x0095 212#define VMCB_EXITCODE_CR6_WRITE_TRAP 0x0096 213#define VMCB_EXITCODE_CR7_WRITE_TRAP 0x0097 214#define VMCB_EXITCODE_CR8_WRITE_TRAP 0x0098 215#define VMCB_EXITCODE_CR9_WRITE_TRAP 0x0099 216#define VMCB_EXITCODE_CR10_WRITE_TRAP 0x009A 217#define VMCB_EXITCODE_CR11_WRITE_TRAP 0x009B 218#define VMCB_EXITCODE_CR12_WRITE_TRAP 0x009C 219#define VMCB_EXITCODE_CR13_WRITE_TRAP 0x009D 220#define VMCB_EXITCODE_CR14_WRITE_TRAP 0x009E 221#define VMCB_EXITCODE_CR15_WRITE_TRAP 0x009F 222#define VMCB_EXITCODE_NPF 0x0400 223#define VMCB_EXITCODE_AVIC_INCOMP_IPI 0x0401 224#define VMCB_EXITCODE_AVIC_NOACCEL 0x0402 225#define VMCB_EXITCODE_VMGEXIT 0x0403 226#define VMCB_EXITCODE_INVALID -1 227 228/* -------------------------------------------------------------------------- */ 229 230struct vmcb_ctrl { 231 uint32_t intercept_cr; 232#define VMCB_CTRL_INTERCEPT_RCR(x) __BIT( 0 + x) 233#define VMCB_CTRL_INTERCEPT_WCR(x) __BIT(16 + x) 234 235 uint32_t intercept_dr; 236#define VMCB_CTRL_INTERCEPT_RDR(x) __BIT( 0 + x) 237#define VMCB_CTRL_INTERCEPT_WDR(x) __BIT(16 + x) 238 239 uint32_t intercept_vec; 240#define VMCB_CTRL_INTERCEPT_VEC(x) __BIT(x) 241 242 uint32_t intercept_misc1; 243#define VMCB_CTRL_INTERCEPT_INTR __BIT(0) 244#define VMCB_CTRL_INTERCEPT_NMI __BIT(1) 245#define VMCB_CTRL_INTERCEPT_SMI __BIT(2) 246#define VMCB_CTRL_INTERCEPT_INIT __BIT(3) 247#define VMCB_CTRL_INTERCEPT_VINTR __BIT(4) 248#define VMCB_CTRL_INTERCEPT_CR0_SPEC __BIT(5) 249#define VMCB_CTRL_INTERCEPT_RIDTR __BIT(6) 250#define VMCB_CTRL_INTERCEPT_RGDTR __BIT(7) 251#define VMCB_CTRL_INTERCEPT_RLDTR __BIT(8) 252#define VMCB_CTRL_INTERCEPT_RTR __BIT(9) 253#define VMCB_CTRL_INTERCEPT_WIDTR __BIT(10) 254#define VMCB_CTRL_INTERCEPT_WGDTR __BIT(11) 255#define VMCB_CTRL_INTERCEPT_WLDTR __BIT(12) 256#define VMCB_CTRL_INTERCEPT_WTR __BIT(13) 257#define VMCB_CTRL_INTERCEPT_RDTSC __BIT(14) 258#define VMCB_CTRL_INTERCEPT_RDPMC __BIT(15) 259#define VMCB_CTRL_INTERCEPT_PUSHF __BIT(16) 260#define VMCB_CTRL_INTERCEPT_POPF __BIT(17) 261#define VMCB_CTRL_INTERCEPT_CPUID __BIT(18) 262#define VMCB_CTRL_INTERCEPT_RSM __BIT(19) 263#define VMCB_CTRL_INTERCEPT_IRET __BIT(20) 264#define VMCB_CTRL_INTERCEPT_INTN __BIT(21) 265#define VMCB_CTRL_INTERCEPT_INVD __BIT(22) 266#define VMCB_CTRL_INTERCEPT_PAUSE __BIT(23) 267#define VMCB_CTRL_INTERCEPT_HLT __BIT(24) 268#define VMCB_CTRL_INTERCEPT_INVLPG __BIT(25) 269#define VMCB_CTRL_INTERCEPT_INVLPGA __BIT(26) 270#define VMCB_CTRL_INTERCEPT_IOIO_PROT __BIT(27) 271#define VMCB_CTRL_INTERCEPT_MSR_PROT __BIT(28) 272#define VMCB_CTRL_INTERCEPT_TASKSW __BIT(29) 273#define VMCB_CTRL_INTERCEPT_FERR_FREEZE __BIT(30) 274#define VMCB_CTRL_INTERCEPT_SHUTDOWN __BIT(31) 275 276 uint32_t intercept_misc2; 277#define VMCB_CTRL_INTERCEPT_VMRUN __BIT(0) 278#define VMCB_CTRL_INTERCEPT_VMMCALL __BIT(1) 279#define VMCB_CTRL_INTERCEPT_VMLOAD __BIT(2) 280#define VMCB_CTRL_INTERCEPT_VMSAVE __BIT(3) 281#define VMCB_CTRL_INTERCEPT_STGI __BIT(4) 282#define VMCB_CTRL_INTERCEPT_CLGI __BIT(5) 283#define VMCB_CTRL_INTERCEPT_SKINIT __BIT(6) 284#define VMCB_CTRL_INTERCEPT_RDTSCP __BIT(7) 285#define VMCB_CTRL_INTERCEPT_ICEBP __BIT(8) 286#define VMCB_CTRL_INTERCEPT_WBINVD __BIT(9) 287#define VMCB_CTRL_INTERCEPT_MONITOR __BIT(10) 288#define VMCB_CTRL_INTERCEPT_MWAIT __BIT(12) 289#define VMCB_CTRL_INTERCEPT_XSETBV __BIT(13) 290#define VMCB_CTRL_INTERCEPT_EFER_SPEC __BIT(15) 291#define VMCB_CTRL_INTERCEPT_WCR_SPEC(x) __BIT(16 + x) 292 293 uint8_t rsvd1[40]; 294 uint16_t pause_filt_thresh; 295 uint16_t pause_filt_cnt; 296 uint64_t iopm_base_pa; 297 uint64_t msrpm_base_pa; 298 uint64_t tsc_offset; 299 uint32_t guest_asid; 300 301 uint32_t tlb_ctrl; 302#define VMCB_CTRL_TLB_CTRL_FLUSH_ALL 0x01 303#define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST 0x03 304#define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST_NONGLOBAL 0x07 305 306 uint64_t v; 307#define VMCB_CTRL_V_TPR __BITS(3,0) 308#define VMCB_CTRL_V_IRQ __BIT(8) 309#define VMCB_CTRL_V_VGIF __BIT(9) 310#define VMCB_CTRL_V_INTR_PRIO __BITS(19,16) 311#define VMCB_CTRL_V_IGN_TPR __BIT(20) 312#define VMCB_CTRL_V_INTR_MASKING __BIT(24) 313#define VMCB_CTRL_V_GUEST_VGIF __BIT(25) 314#define VMCB_CTRL_V_AVIC_EN __BIT(31) 315#define VMCB_CTRL_V_INTR_VECTOR __BITS(39,32) 316 317 uint64_t intr; 318#define VMCB_CTRL_INTR_SHADOW __BIT(0) 319 320 uint64_t exitcode; 321 uint64_t exitinfo1; 322 uint64_t exitinfo2; 323 324 uint64_t exitintinfo; 325#define VMCB_CTRL_EXITINTINFO_VECTOR __BITS(7,0) 326#define VMCB_CTRL_EXITINTINFO_TYPE __BITS(10,8) 327#define VMCB_CTRL_EXITINTINFO_EV __BIT(11) 328#define VMCB_CTRL_EXITINTINFO_V __BIT(31) 329#define VMCB_CTRL_EXITINTINFO_ERRORCODE __BITS(63,32) 330 331 uint64_t enable1; 332#define VMCB_CTRL_ENABLE_NP __BIT(0) 333#define VMCB_CTRL_ENABLE_SEV __BIT(1) 334#define VMCB_CTRL_ENABLE_ES_SEV __BIT(2) 335 336 uint64_t avic; 337#define VMCB_CTRL_AVIC_APIC_BAR __BITS(51,0) 338 339 uint64_t ghcb; 340 341 uint64_t eventinj; 342#define VMCB_CTRL_EVENTINJ_VECTOR __BITS(7,0) 343#define VMCB_CTRL_EVENTINJ_TYPE __BITS(10,8) 344#define VMCB_CTRL_EVENTINJ_EV __BIT(11) 345#define VMCB_CTRL_EVENTINJ_V __BIT(31) 346#define VMCB_CTRL_EVENTINJ_ERRORCODE __BITS(63,32) 347 348 uint64_t n_cr3; 349 350 uint64_t enable2; 351#define VMCB_CTRL_ENABLE_LBR __BIT(0) 352#define VMCB_CTRL_ENABLE_VVMSAVE __BIT(1) 353 354 uint32_t vmcb_clean; 355#define VMCB_CTRL_VMCB_CLEAN_I __BIT(0) 356#define VMCB_CTRL_VMCB_CLEAN_IOPM __BIT(1) 357#define VMCB_CTRL_VMCB_CLEAN_ASID __BIT(2) 358#define VMCB_CTRL_VMCB_CLEAN_TPR __BIT(3) 359#define VMCB_CTRL_VMCB_CLEAN_NP __BIT(4) 360#define VMCB_CTRL_VMCB_CLEAN_CR __BIT(5) 361#define VMCB_CTRL_VMCB_CLEAN_DR __BIT(6) 362#define VMCB_CTRL_VMCB_CLEAN_DT __BIT(7) 363#define VMCB_CTRL_VMCB_CLEAN_SEG __BIT(8) 364#define VMCB_CTRL_VMCB_CLEAN_CR2 __BIT(9) 365#define VMCB_CTRL_VMCB_CLEAN_LBR __BIT(10) 366#define VMCB_CTRL_VMCB_CLEAN_AVIC __BIT(11) 367 368 uint32_t rsvd2; 369 uint64_t nrip; 370 uint8_t inst_len; 371 uint8_t inst_bytes[15]; 372 uint64_t avic_abpp; 373 uint64_t rsvd3; 374 uint64_t avic_ltp; 375 376 uint64_t avic_phys; 377#define VMCB_CTRL_AVIC_PHYS_TABLE_PTR __BITS(51,12) 378#define VMCB_CTRL_AVIC_PHYS_MAX_INDEX __BITS(7,0) 379 380 uint64_t rsvd4; 381 uint64_t vmcb_ptr; 382 383 uint8_t pad[752]; 384} __packed; 385 386CTASSERT(sizeof(struct vmcb_ctrl) == 1024); 387 388struct vmcb_segment { 389 uint16_t selector; 390 uint16_t attrib; /* hidden */ 391 uint32_t limit; /* hidden */ 392 uint64_t base; /* hidden */ 393} __packed; 394 395CTASSERT(sizeof(struct vmcb_segment) == 16); 396 397struct vmcb_state { 398 struct vmcb_segment es; 399 struct vmcb_segment cs; 400 struct vmcb_segment ss; 401 struct vmcb_segment ds; 402 struct vmcb_segment fs; 403 struct vmcb_segment gs; 404 struct vmcb_segment gdt; 405 struct vmcb_segment ldt; 406 struct vmcb_segment idt; 407 struct vmcb_segment tr; 408 uint8_t rsvd1[43]; 409 uint8_t cpl; 410 uint8_t rsvd2[4]; 411 uint64_t efer; 412 uint8_t rsvd3[112]; 413 uint64_t cr4; 414 uint64_t cr3; 415 uint64_t cr0; 416 uint64_t dr7; 417 uint64_t dr6; 418 uint64_t rflags; 419 uint64_t rip; 420 uint8_t rsvd4[88]; 421 uint64_t rsp; 422 uint8_t rsvd5[24]; 423 uint64_t rax; 424 uint64_t star; 425 uint64_t lstar; 426 uint64_t cstar; 427 uint64_t sfmask; 428 uint64_t kernelgsbase; 429 uint64_t sysenter_cs; 430 uint64_t sysenter_esp; 431 uint64_t sysenter_eip; 432 uint64_t cr2; 433 uint8_t rsvd6[32]; 434 uint64_t g_pat; 435 uint64_t dbgctl; 436 uint64_t br_from; 437 uint64_t br_to; 438 uint64_t int_from; 439 uint64_t int_to; 440 uint8_t pad[2408]; 441} __packed; 442 443CTASSERT(sizeof(struct vmcb_state) == 0xC00); 444 445struct vmcb { 446 struct vmcb_ctrl ctrl; 447 struct vmcb_state state; 448} __packed; 449 450CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); 451CTASSERT(offsetof(struct vmcb, state) == 0x400); 452 453/* -------------------------------------------------------------------------- */ 454 455struct svm_hsave { 456 paddr_t pa; 457}; 458 459static struct svm_hsave hsave[MAXCPUS]; 460 461static uint8_t *svm_asidmap __read_mostly; 462static uint32_t svm_maxasid __read_mostly; 463static kmutex_t svm_asidlock __cacheline_aligned; 464 465static bool svm_decode_assist __read_mostly; 466static uint32_t svm_ctrl_tlb_flush __read_mostly; 467 468#define SVM_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) 469static uint64_t svm_xcr0_mask __read_mostly; 470 471#define SVM_NCPUIDS 32 472 473#define VMCB_NPAGES 1 474 475#define MSRBM_NPAGES 2 476#define MSRBM_SIZE (MSRBM_NPAGES * PAGE_SIZE) 477 478#define IOBM_NPAGES 3 479#define IOBM_SIZE (IOBM_NPAGES * PAGE_SIZE) 480 481/* Does not include EFER_LMSLE. */ 482#define EFER_VALID \ 483 (EFER_SCE|EFER_LME|EFER_LMA|EFER_NXE|EFER_SVME|EFER_FFXSR|EFER_TCE) 484 485#define EFER_TLB_FLUSH \ 486 (EFER_NXE|EFER_LMA|EFER_LME) 487#define CR0_TLB_FLUSH \ 488 (CR0_PG|CR0_WP|CR0_CD|CR0_NW) 489#define CR4_TLB_FLUSH \ 490 (CR4_PGE|CR4_PAE|CR4_PSE) 491 492/* -------------------------------------------------------------------------- */ 493 494struct svm_machdata { 495 bool cpuidpresent[SVM_NCPUIDS]; 496 struct nvmm_x86_conf_cpuid cpuid[SVM_NCPUIDS]; 497 volatile uint64_t mach_htlb_gen; 498}; 499 500static const size_t svm_conf_sizes[NVMM_X86_NCONF] = { 501 [NVMM_X86_CONF_CPUID] = sizeof(struct nvmm_x86_conf_cpuid) 502}; 503 504struct svm_cpudata { 505 /* General */ 506 bool shared_asid; 507 bool gtlb_want_flush; 508 bool gtsc_want_update; 509 uint64_t vcpu_htlb_gen; 510 511 /* VMCB */ 512 struct vmcb *vmcb; 513 paddr_t vmcb_pa; 514 515 /* I/O bitmap */ 516 uint8_t *iobm; 517 paddr_t iobm_pa; 518 519 /* MSR bitmap */ 520 uint8_t *msrbm; 521 paddr_t msrbm_pa; 522 523 /* Host state */ 524 uint64_t hxcr0; 525 uint64_t star; 526 uint64_t lstar; 527 uint64_t cstar; 528 uint64_t sfmask; 529 uint64_t fsbase; 530 uint64_t kernelgsbase; 531 bool ts_set; 532 struct xsave_header hfpu __aligned(64); 533 534 /* Intr state */ 535 bool int_window_exit; 536 bool nmi_window_exit; 537 bool evt_pending; 538 539 /* Guest state */ 540 uint64_t gxcr0; 541 uint64_t gprs[NVMM_X64_NGPR]; 542 uint64_t drs[NVMM_X64_NDR]; 543 uint64_t gtsc; 544 struct xsave_header gfpu __aligned(64); 545}; 546 547static void 548svm_vmcb_cache_default(struct vmcb *vmcb) 549{ 550 vmcb->ctrl.vmcb_clean = 551 VMCB_CTRL_VMCB_CLEAN_I | 552 VMCB_CTRL_VMCB_CLEAN_IOPM | 553 VMCB_CTRL_VMCB_CLEAN_ASID | 554 VMCB_CTRL_VMCB_CLEAN_TPR | 555 VMCB_CTRL_VMCB_CLEAN_NP | 556 VMCB_CTRL_VMCB_CLEAN_CR | 557 VMCB_CTRL_VMCB_CLEAN_DR | 558 VMCB_CTRL_VMCB_CLEAN_DT | 559 VMCB_CTRL_VMCB_CLEAN_SEG | 560 VMCB_CTRL_VMCB_CLEAN_CR2 | 561 VMCB_CTRL_VMCB_CLEAN_LBR | 562 VMCB_CTRL_VMCB_CLEAN_AVIC; 563} 564 565static void 566svm_vmcb_cache_update(struct vmcb *vmcb, uint64_t flags) 567{ 568 if (flags & NVMM_X64_STATE_SEGS) { 569 vmcb->ctrl.vmcb_clean &= 570 ~(VMCB_CTRL_VMCB_CLEAN_SEG | VMCB_CTRL_VMCB_CLEAN_DT); 571 } 572 if (flags & NVMM_X64_STATE_CRS) { 573 vmcb->ctrl.vmcb_clean &= 574 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_CR2 | 575 VMCB_CTRL_VMCB_CLEAN_TPR); 576 } 577 if (flags & NVMM_X64_STATE_DRS) { 578 vmcb->ctrl.vmcb_clean &= ~VMCB_CTRL_VMCB_CLEAN_DR; 579 } 580 if (flags & NVMM_X64_STATE_MSRS) { 581 /* CR for EFER, NP for PAT. */ 582 vmcb->ctrl.vmcb_clean &= 583 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_NP); 584 } 585} 586 587static inline void 588svm_vmcb_cache_flush(struct vmcb *vmcb, uint64_t flags) 589{ 590 vmcb->ctrl.vmcb_clean &= ~flags; 591} 592 593static inline void 594svm_vmcb_cache_flush_all(struct vmcb *vmcb) 595{ 596 vmcb->ctrl.vmcb_clean = 0; 597} 598 599#define SVM_EVENT_TYPE_HW_INT 0 600#define SVM_EVENT_TYPE_NMI 2 601#define SVM_EVENT_TYPE_EXC 3 602#define SVM_EVENT_TYPE_SW_INT 4 603 604static void 605svm_event_waitexit_enable(struct nvmm_cpu *vcpu, bool nmi) 606{ 607 struct svm_cpudata *cpudata = vcpu->cpudata; 608 struct vmcb *vmcb = cpudata->vmcb; 609 610 if (nmi) { 611 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_IRET; 612 cpudata->nmi_window_exit = true; 613 } else { 614 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_VINTR; 615 vmcb->ctrl.v |= (VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 616 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 617 cpudata->int_window_exit = true; 618 } 619 620 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 621} 622 623static void 624svm_event_waitexit_disable(struct nvmm_cpu *vcpu, bool nmi) 625{ 626 struct svm_cpudata *cpudata = vcpu->cpudata; 627 struct vmcb *vmcb = cpudata->vmcb; 628 629 if (nmi) { 630 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_IRET; 631 cpudata->nmi_window_exit = false; 632 } else { 633 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_VINTR; 634 vmcb->ctrl.v &= ~(VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 635 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 636 cpudata->int_window_exit = false; 637 } 638 639 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 640} 641 642static inline int 643svm_event_has_error(uint64_t vector) 644{ 645 switch (vector) { 646 case 8: /* #DF */ 647 case 10: /* #TS */ 648 case 11: /* #NP */ 649 case 12: /* #SS */ 650 case 13: /* #GP */ 651 case 14: /* #PF */ 652 case 17: /* #AC */ 653 case 30: /* #SX */ 654 return 1; 655 default: 656 return 0; 657 } 658} 659 660static int 661svm_vcpu_inject(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 662 struct nvmm_event *event) 663{ 664 struct svm_cpudata *cpudata = vcpu->cpudata; 665 struct vmcb *vmcb = cpudata->vmcb; 666 int type = 0, err = 0; 667 668 if (event->vector >= 256) { 669 return EINVAL; 670 } 671 672 switch (event->type) { 673 case NVMM_EVENT_INTERRUPT_HW: 674 type = SVM_EVENT_TYPE_HW_INT; 675 if (event->vector == 2) { 676 type = SVM_EVENT_TYPE_NMI; 677 } 678 if (type == SVM_EVENT_TYPE_NMI) { 679 if (cpudata->nmi_window_exit) { 680 return EAGAIN; 681 } 682 svm_event_waitexit_enable(vcpu, true); 683 } else { 684 if (((vmcb->state.rflags & PSL_I) == 0) || 685 ((vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0)) { 686 svm_event_waitexit_enable(vcpu, false); 687 return EAGAIN; 688 } 689 } 690 err = 0; 691 break; 692 case NVMM_EVENT_INTERRUPT_SW: 693 return EINVAL; 694 case NVMM_EVENT_EXCEPTION: 695 type = SVM_EVENT_TYPE_EXC; 696 if (event->vector == 2 || event->vector >= 32) 697 return EINVAL; 698 if (event->vector == 3 || event->vector == 0) 699 return EINVAL; 700 err = svm_event_has_error(event->vector); 701 break; 702 default: 703 return EINVAL; 704 } 705 706 vmcb->ctrl.eventinj = 707 __SHIFTIN(event->vector, VMCB_CTRL_EVENTINJ_VECTOR) | 708 __SHIFTIN(type, VMCB_CTRL_EVENTINJ_TYPE) | 709 __SHIFTIN(err, VMCB_CTRL_EVENTINJ_EV) | 710 __SHIFTIN(1, VMCB_CTRL_EVENTINJ_V) | 711 __SHIFTIN(event->u.error, VMCB_CTRL_EVENTINJ_ERRORCODE); 712 713 cpudata->evt_pending = true; 714 715 return 0; 716} 717 718static void 719svm_inject_ud(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 720{ 721 struct nvmm_event event; 722 int ret __diagused; 723 724 event.type = NVMM_EVENT_EXCEPTION; 725 event.vector = 6; 726 event.u.error = 0; 727 728 ret = svm_vcpu_inject(mach, vcpu, &event); 729 KASSERT(ret == 0); 730} 731 732static void 733svm_inject_gp(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 734{ 735 struct nvmm_event event; 736 int ret __diagused; 737 738 event.type = NVMM_EVENT_EXCEPTION; 739 event.vector = 13; 740 event.u.error = 0; 741 742 ret = svm_vcpu_inject(mach, vcpu, &event); 743 KASSERT(ret == 0); 744} 745 746static inline void 747svm_inkernel_advance(struct vmcb *vmcb) 748{ 749 /* 750 * Maybe we should also apply single-stepping and debug exceptions. 751 * Matters for guest-ring3, because it can execute 'cpuid' under a 752 * debugger. 753 */ 754 vmcb->state.rip = vmcb->ctrl.nrip; 755 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 756} 757 758static void 759svm_inkernel_handle_cpuid(struct nvmm_cpu *vcpu, uint64_t eax, uint64_t ecx) 760{ 761 struct svm_cpudata *cpudata = vcpu->cpudata; 762 uint64_t cr4; 763 764 switch (eax) { 765 case 0x00000001: 766 cpudata->vmcb->state.rax &= nvmm_cpuid_00000001.eax; 767 768 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~CPUID_LOCAL_APIC_ID; 769 cpudata->gprs[NVMM_X64_GPR_RBX] |= __SHIFTIN(vcpu->cpuid, 770 CPUID_LOCAL_APIC_ID); 771 772 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000001.ecx; 773 cpudata->gprs[NVMM_X64_GPR_RCX] |= CPUID2_RAZ; 774 775 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000001.edx; 776 777 /* CPUID2_OSXSAVE depends on CR4. */ 778 cr4 = cpudata->vmcb->state.cr4; 779 if (!(cr4 & CR4_OSXSAVE)) { 780 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~CPUID2_OSXSAVE; 781 } 782 break; 783 case 0x00000005: 784 case 0x00000006: 785 cpudata->vmcb->state.rax = 0; 786 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 787 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 788 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 789 break; 790 case 0x00000007: 791 cpudata->vmcb->state.rax &= nvmm_cpuid_00000007.eax; 792 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_00000007.ebx; 793 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000007.ecx; 794 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000007.edx; 795 break; 796 case 0x0000000D: 797 if (svm_xcr0_mask == 0) { 798 break; 799 } 800 switch (ecx) { 801 case 0: 802 cpudata->vmcb->state.rax = svm_xcr0_mask & 0xFFFFFFFF; 803 if (cpudata->gxcr0 & XCR0_SSE) { 804 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct fxsave); 805 } else { 806 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct save87); 807 } 808 cpudata->gprs[NVMM_X64_GPR_RBX] += 64; /* XSAVE header */ 809 cpudata->gprs[NVMM_X64_GPR_RCX] = sizeof(struct fxsave); 810 cpudata->gprs[NVMM_X64_GPR_RDX] = svm_xcr0_mask >> 32; 811 break; 812 case 1: 813 cpudata->vmcb->state.rax &= ~CPUID_PES1_XSAVES; 814 break; 815 } 816 break; 817 case 0x40000000: 818 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 819 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 820 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 821 memcpy(&cpudata->gprs[NVMM_X64_GPR_RBX], "___ ", 4); 822 memcpy(&cpudata->gprs[NVMM_X64_GPR_RCX], "NVMM", 4); 823 memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); 824 break; 825 case 0x80000001: 826 cpudata->vmcb->state.rax &= nvmm_cpuid_80000001.eax; 827 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000001.ebx; 828 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000001.ecx; 829 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000001.edx; 830 break; 831 default: 832 break; 833 } 834} 835 836static void 837svm_exit_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 838 struct nvmm_exit *exit) 839{ 840 struct svm_machdata *machdata = mach->machdata; 841 struct svm_cpudata *cpudata = vcpu->cpudata; 842 struct nvmm_x86_conf_cpuid *cpuid; 843 uint64_t eax, ecx; 844 u_int descs[4]; 845 size_t i; 846 847 eax = cpudata->vmcb->state.rax; 848 ecx = cpudata->gprs[NVMM_X64_GPR_RCX]; 849 x86_cpuid2(eax, ecx, descs); 850 851 cpudata->vmcb->state.rax = descs[0]; 852 cpudata->gprs[NVMM_X64_GPR_RBX] = descs[1]; 853 cpudata->gprs[NVMM_X64_GPR_RCX] = descs[2]; 854 cpudata->gprs[NVMM_X64_GPR_RDX] = descs[3]; 855 856 for (i = 0; i < SVM_NCPUIDS; i++) { 857 cpuid = &machdata->cpuid[i]; 858 if (!machdata->cpuidpresent[i]) { 859 continue; 860 } 861 if (cpuid->leaf != eax) { 862 continue; 863 } 864 865 /* del */ 866 cpudata->vmcb->state.rax &= ~cpuid->del.eax; 867 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~cpuid->del.ebx; 868 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~cpuid->del.ecx; 869 cpudata->gprs[NVMM_X64_GPR_RDX] &= ~cpuid->del.edx; 870 871 /* set */ 872 cpudata->vmcb->state.rax |= cpuid->set.eax; 873 cpudata->gprs[NVMM_X64_GPR_RBX] |= cpuid->set.ebx; 874 cpudata->gprs[NVMM_X64_GPR_RCX] |= cpuid->set.ecx; 875 cpudata->gprs[NVMM_X64_GPR_RDX] |= cpuid->set.edx; 876 877 break; 878 } 879 880 /* Overwrite non-tunable leaves. */ 881 svm_inkernel_handle_cpuid(vcpu, eax, ecx); 882 883 svm_inkernel_advance(cpudata->vmcb); 884 exit->reason = NVMM_EXIT_NONE; 885} 886 887static void 888svm_exit_hlt(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 889 struct nvmm_exit *exit) 890{ 891 struct svm_cpudata *cpudata = vcpu->cpudata; 892 struct vmcb *vmcb = cpudata->vmcb; 893 894 if (cpudata->int_window_exit && (vmcb->state.rflags & PSL_I)) { 895 svm_event_waitexit_disable(vcpu, false); 896 } 897 898 svm_inkernel_advance(cpudata->vmcb); 899 exit->reason = NVMM_EXIT_HALTED; 900} 901 902#define SVM_EXIT_IO_PORT __BITS(31,16) 903#define SVM_EXIT_IO_SEG __BITS(12,10) 904#define SVM_EXIT_IO_A64 __BIT(9) 905#define SVM_EXIT_IO_A32 __BIT(8) 906#define SVM_EXIT_IO_A16 __BIT(7) 907#define SVM_EXIT_IO_SZ32 __BIT(6) 908#define SVM_EXIT_IO_SZ16 __BIT(5) 909#define SVM_EXIT_IO_SZ8 __BIT(4) 910#define SVM_EXIT_IO_REP __BIT(3) 911#define SVM_EXIT_IO_STR __BIT(2) 912#define SVM_EXIT_IO_IN __BIT(0) 913 914static void 915svm_exit_io(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 916 struct nvmm_exit *exit) 917{ 918 struct svm_cpudata *cpudata = vcpu->cpudata; 919 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 920 uint64_t nextpc = cpudata->vmcb->ctrl.exitinfo2; 921 922 exit->reason = NVMM_EXIT_IO; 923 924 if (info & SVM_EXIT_IO_IN) { 925 exit->u.io.type = NVMM_EXIT_IO_IN; 926 } else { 927 exit->u.io.type = NVMM_EXIT_IO_OUT; 928 } 929 930 exit->u.io.port = __SHIFTOUT(info, SVM_EXIT_IO_PORT); 931 932 if (svm_decode_assist) { 933 KASSERT(__SHIFTOUT(info, SVM_EXIT_IO_SEG) < 6); 934 exit->u.io.seg = __SHIFTOUT(info, SVM_EXIT_IO_SEG); 935 } else { 936 exit->u.io.seg = -1; 937 } 938 939 if (info & SVM_EXIT_IO_A64) { 940 exit->u.io.address_size = 8; 941 } else if (info & SVM_EXIT_IO_A32) { 942 exit->u.io.address_size = 4; 943 } else if (info & SVM_EXIT_IO_A16) { 944 exit->u.io.address_size = 2; 945 } 946 947 if (info & SVM_EXIT_IO_SZ32) { 948 exit->u.io.operand_size = 4; 949 } else if (info & SVM_EXIT_IO_SZ16) { 950 exit->u.io.operand_size = 2; 951 } else if (info & SVM_EXIT_IO_SZ8) { 952 exit->u.io.operand_size = 1; 953 } 954 955 exit->u.io.rep = (info & SVM_EXIT_IO_REP) != 0; 956 exit->u.io.str = (info & SVM_EXIT_IO_STR) != 0; 957 exit->u.io.npc = nextpc; 958} 959 960static const uint64_t msr_ignore_list[] = { 961 0xc0010055, /* MSR_CMPHALT */ 962 MSR_DE_CFG, 963 MSR_IC_CFG, 964 MSR_UCODE_AMD_PATCHLEVEL 965}; 966 967static bool 968svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 969 struct nvmm_exit *exit) 970{ 971 struct svm_cpudata *cpudata = vcpu->cpudata; 972 struct vmcb *vmcb = cpudata->vmcb; 973 uint64_t val; 974 size_t i; 975 976 switch (exit->u.msr.type) { 977 case NVMM_EXIT_MSR_RDMSR: 978 if (exit->u.msr.msr == MSR_NB_CFG) { 979 val = NB_CFG_INITAPICCPUIDLO; 980 vmcb->state.rax = (val & 0xFFFFFFFF); 981 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 982 goto handled; 983 } 984 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 985 if (msr_ignore_list[i] != exit->u.msr.msr) 986 continue; 987 val = 0; 988 vmcb->state.rax = (val & 0xFFFFFFFF); 989 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 990 goto handled; 991 } 992 break; 993 case NVMM_EXIT_MSR_WRMSR: 994 if (exit->u.msr.msr == MSR_EFER) { 995 if (__predict_false(exit->u.msr.val & ~EFER_VALID)) { 996 goto error; 997 } 998 if ((vmcb->state.efer ^ exit->u.msr.val) & 999 EFER_TLB_FLUSH) { 1000 cpudata->gtlb_want_flush = true; 1001 } 1002 vmcb->state.efer = exit->u.msr.val | EFER_SVME; 1003 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_CR); 1004 goto handled; 1005 } 1006 if (exit->u.msr.msr == MSR_TSC) { 1007 cpudata->gtsc = exit->u.msr.val; 1008 cpudata->gtsc_want_update = true; 1009 goto handled; 1010 } 1011 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1012 if (msr_ignore_list[i] != exit->u.msr.msr) 1013 continue; 1014 goto handled; 1015 } 1016 break; 1017 } 1018 1019 return false; 1020 1021handled: 1022 svm_inkernel_advance(cpudata->vmcb); 1023 return true; 1024 1025error: 1026 svm_inject_gp(mach, vcpu); 1027 return true; 1028} 1029 1030static void 1031svm_exit_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1032 struct nvmm_exit *exit) 1033{ 1034 struct svm_cpudata *cpudata = vcpu->cpudata; 1035 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1036 1037 if (info == 0) { 1038 exit->u.msr.type = NVMM_EXIT_MSR_RDMSR; 1039 } else { 1040 exit->u.msr.type = NVMM_EXIT_MSR_WRMSR; 1041 } 1042 1043 exit->u.msr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1044 1045 if (info == 1) { 1046 uint64_t rdx, rax; 1047 rdx = cpudata->gprs[NVMM_X64_GPR_RDX]; 1048 rax = cpudata->vmcb->state.rax; 1049 exit->u.msr.val = (rdx << 32) | (rax & 0xFFFFFFFF); 1050 } else { 1051 exit->u.msr.val = 0; 1052 } 1053 1054 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1055 exit->reason = NVMM_EXIT_NONE; 1056 return; 1057 } 1058 1059 exit->reason = NVMM_EXIT_MSR; 1060 exit->u.msr.npc = cpudata->vmcb->ctrl.nrip; 1061} 1062 1063static void 1064svm_exit_npf(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1065 struct nvmm_exit *exit) 1066{ 1067 struct svm_cpudata *cpudata = vcpu->cpudata; 1068 gpaddr_t gpa = cpudata->vmcb->ctrl.exitinfo2; 1069 1070 exit->reason = NVMM_EXIT_MEMORY; 1071 if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_W) 1072 exit->u.mem.prot = PROT_WRITE; 1073 else if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_X) 1074 exit->u.mem.prot = PROT_EXEC; 1075 else 1076 exit->u.mem.prot = PROT_READ; 1077 exit->u.mem.gpa = gpa; 1078 exit->u.mem.inst_len = cpudata->vmcb->ctrl.inst_len; 1079 memcpy(exit->u.mem.inst_bytes, cpudata->vmcb->ctrl.inst_bytes, 1080 sizeof(exit->u.mem.inst_bytes)); 1081} 1082 1083static void 1084svm_exit_insn(struct vmcb *vmcb, struct nvmm_exit *exit, uint64_t reason) 1085{ 1086 exit->u.insn.npc = vmcb->ctrl.nrip; 1087 exit->reason = reason; 1088} 1089 1090static void 1091svm_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1092 struct nvmm_exit *exit) 1093{ 1094 struct svm_cpudata *cpudata = vcpu->cpudata; 1095 struct vmcb *vmcb = cpudata->vmcb; 1096 uint64_t val; 1097 1098 exit->reason = NVMM_EXIT_NONE; 1099 1100 val = (cpudata->gprs[NVMM_X64_GPR_RDX] << 32) | 1101 (vmcb->state.rax & 0xFFFFFFFF); 1102 1103 if (__predict_false(cpudata->gprs[NVMM_X64_GPR_RCX] != 0)) { 1104 goto error; 1105 } else if (__predict_false(vmcb->state.cpl != 0)) { 1106 goto error; 1107 } else if (__predict_false((val & ~svm_xcr0_mask) != 0)) { 1108 goto error; 1109 } else if (__predict_false((val & XCR0_X87) == 0)) { 1110 goto error; 1111 } 1112 1113 cpudata->gxcr0 = val; 1114 1115 svm_inkernel_advance(cpudata->vmcb); 1116 return; 1117 1118error: 1119 svm_inject_gp(mach, vcpu); 1120} 1121 1122/* -------------------------------------------------------------------------- */ 1123 1124static void 1125svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) 1126{ 1127 struct svm_cpudata *cpudata = vcpu->cpudata; 1128 1129 cpudata->ts_set = (rcr0() & CR0_TS) != 0; 1130 1131 fpu_area_save(&cpudata->hfpu, svm_xcr0_mask); 1132 fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask); 1133 1134 if (svm_xcr0_mask != 0) { 1135 cpudata->hxcr0 = rdxcr(0); 1136 wrxcr(0, cpudata->gxcr0); 1137 } 1138} 1139 1140static void 1141svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) 1142{ 1143 struct svm_cpudata *cpudata = vcpu->cpudata; 1144 1145 if (svm_xcr0_mask != 0) { 1146 cpudata->gxcr0 = rdxcr(0); 1147 wrxcr(0, cpudata->hxcr0); 1148 } 1149 1150 fpu_area_save(&cpudata->gfpu, svm_xcr0_mask); 1151 fpu_area_restore(&cpudata->hfpu, svm_xcr0_mask); 1152 1153 if (cpudata->ts_set) { 1154 stts(); 1155 } 1156} 1157 1158static void 1159svm_vcpu_guest_dbregs_enter(struct nvmm_cpu *vcpu) 1160{ 1161 struct svm_cpudata *cpudata = vcpu->cpudata; 1162 1163 x86_dbregs_save(curlwp); 1164 1165 ldr7(0); 1166 1167 ldr0(cpudata->drs[NVMM_X64_DR_DR0]); 1168 ldr1(cpudata->drs[NVMM_X64_DR_DR1]); 1169 ldr2(cpudata->drs[NVMM_X64_DR_DR2]); 1170 ldr3(cpudata->drs[NVMM_X64_DR_DR3]); 1171} 1172 1173static void 1174svm_vcpu_guest_dbregs_leave(struct nvmm_cpu *vcpu) 1175{ 1176 struct svm_cpudata *cpudata = vcpu->cpudata; 1177 1178 cpudata->drs[NVMM_X64_DR_DR0] = rdr0(); 1179 cpudata->drs[NVMM_X64_DR_DR1] = rdr1(); 1180 cpudata->drs[NVMM_X64_DR_DR2] = rdr2(); 1181 cpudata->drs[NVMM_X64_DR_DR3] = rdr3(); 1182 1183 x86_dbregs_restore(curlwp); 1184} 1185 1186static void 1187svm_vcpu_guest_misc_enter(struct nvmm_cpu *vcpu) 1188{ 1189 struct svm_cpudata *cpudata = vcpu->cpudata; 1190 1191 cpudata->fsbase = rdmsr(MSR_FSBASE); 1192 cpudata->kernelgsbase = rdmsr(MSR_KERNELGSBASE); 1193} 1194 1195static void 1196svm_vcpu_guest_misc_leave(struct nvmm_cpu *vcpu) 1197{ 1198 struct svm_cpudata *cpudata = vcpu->cpudata; 1199 1200 wrmsr(MSR_STAR, cpudata->star); 1201 wrmsr(MSR_LSTAR, cpudata->lstar); 1202 wrmsr(MSR_CSTAR, cpudata->cstar); 1203 wrmsr(MSR_SFMASK, cpudata->sfmask); 1204 wrmsr(MSR_FSBASE, cpudata->fsbase); 1205 wrmsr(MSR_KERNELGSBASE, cpudata->kernelgsbase); 1206} 1207 1208/* -------------------------------------------------------------------------- */ 1209 1210static inline void 1211svm_gtlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1212{ 1213 struct svm_cpudata *cpudata = vcpu->cpudata; 1214 1215 if (vcpu->hcpu_last != hcpu || cpudata->shared_asid) { 1216 cpudata->gtlb_want_flush = true; 1217 } 1218} 1219 1220static inline void 1221svm_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1222{ 1223 /* 1224 * Nothing to do. If an hTLB flush was needed, either the VCPU was 1225 * executing on this hCPU and the hTLB already got flushed, or it 1226 * was executing on another hCPU in which case the catchup is done 1227 * in svm_gtlb_catchup(). 1228 */ 1229} 1230 1231static inline uint64_t 1232svm_htlb_flush(struct svm_machdata *machdata, struct svm_cpudata *cpudata) 1233{ 1234 struct vmcb *vmcb = cpudata->vmcb; 1235 uint64_t machgen; 1236 1237 machgen = machdata->mach_htlb_gen; 1238 if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) { 1239 return machgen; 1240 } 1241 1242 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1243 return machgen; 1244} 1245 1246static inline void 1247svm_htlb_flush_ack(struct svm_cpudata *cpudata, uint64_t machgen) 1248{ 1249 struct vmcb *vmcb = cpudata->vmcb; 1250 1251 if (__predict_true(vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID)) { 1252 cpudata->vcpu_htlb_gen = machgen; 1253 } 1254} 1255 1256static int 1257svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1258 struct nvmm_exit *exit) 1259{ 1260 struct svm_machdata *machdata = mach->machdata; 1261 struct svm_cpudata *cpudata = vcpu->cpudata; 1262 struct vmcb *vmcb = cpudata->vmcb; 1263 uint64_t machgen; 1264 int hcpu, s; 1265 1266 kpreempt_disable(); 1267 hcpu = cpu_number(); 1268 1269 svm_gtlb_catchup(vcpu, hcpu); 1270 svm_htlb_catchup(vcpu, hcpu); 1271 1272 if (vcpu->hcpu_last != hcpu) { 1273 svm_vmcb_cache_flush_all(vmcb); 1274 cpudata->gtsc_want_update = true; 1275 } 1276 1277 svm_vcpu_guest_dbregs_enter(vcpu); 1278 svm_vcpu_guest_misc_enter(vcpu); 1279 1280 while (1) { 1281 if (cpudata->gtlb_want_flush) { 1282 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1283 } else { 1284 vmcb->ctrl.tlb_ctrl = 0; 1285 } 1286 1287 if (__predict_false(cpudata->gtsc_want_update)) { 1288 vmcb->ctrl.tsc_offset = cpudata->gtsc - rdtsc(); 1289 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 1290 } 1291 1292 s = splhigh(); 1293 machgen = svm_htlb_flush(machdata, cpudata); 1294 svm_vcpu_guest_fpu_enter(vcpu); 1295 svm_vmrun(cpudata->vmcb_pa, cpudata->gprs); 1296 svm_vcpu_guest_fpu_leave(vcpu); 1297 svm_htlb_flush_ack(cpudata, machgen); 1298 splx(s); 1299 1300 svm_vmcb_cache_default(vmcb); 1301 1302 if (vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID) { 1303 cpudata->gtlb_want_flush = false; 1304 cpudata->gtsc_want_update = false; 1305 vcpu->hcpu_last = hcpu; 1306 } 1307 cpudata->evt_pending = false; 1308 1309 switch (vmcb->ctrl.exitcode) { 1310 case VMCB_EXITCODE_INTR: 1311 case VMCB_EXITCODE_NMI: 1312 exit->reason = NVMM_EXIT_NONE; 1313 break; 1314 case VMCB_EXITCODE_VINTR: 1315 svm_event_waitexit_disable(vcpu, false); 1316 exit->reason = NVMM_EXIT_INT_READY; 1317 break; 1318 case VMCB_EXITCODE_IRET: 1319 svm_event_waitexit_disable(vcpu, true); 1320 exit->reason = NVMM_EXIT_NMI_READY; 1321 break; 1322 case VMCB_EXITCODE_CPUID: 1323 svm_exit_cpuid(mach, vcpu, exit); 1324 break; 1325 case VMCB_EXITCODE_HLT: 1326 svm_exit_hlt(mach, vcpu, exit); 1327 break; 1328 case VMCB_EXITCODE_IOIO: 1329 svm_exit_io(mach, vcpu, exit); 1330 break; 1331 case VMCB_EXITCODE_MSR: 1332 svm_exit_msr(mach, vcpu, exit); 1333 break; 1334 case VMCB_EXITCODE_SHUTDOWN: 1335 exit->reason = NVMM_EXIT_SHUTDOWN; 1336 break; 1337 case VMCB_EXITCODE_RDPMC: 1338 case VMCB_EXITCODE_RSM: 1339 case VMCB_EXITCODE_INVLPGA: 1340 case VMCB_EXITCODE_VMRUN: 1341 case VMCB_EXITCODE_VMMCALL: 1342 case VMCB_EXITCODE_VMLOAD: 1343 case VMCB_EXITCODE_VMSAVE: 1344 case VMCB_EXITCODE_STGI: 1345 case VMCB_EXITCODE_CLGI: 1346 case VMCB_EXITCODE_SKINIT: 1347 case VMCB_EXITCODE_RDTSCP: 1348 svm_inject_ud(mach, vcpu); 1349 exit->reason = NVMM_EXIT_NONE; 1350 break; 1351 case VMCB_EXITCODE_MONITOR: 1352 svm_exit_insn(vmcb, exit, NVMM_EXIT_MONITOR); 1353 break; 1354 case VMCB_EXITCODE_MWAIT: 1355 svm_exit_insn(vmcb, exit, NVMM_EXIT_MWAIT); 1356 break; 1357 case VMCB_EXITCODE_MWAIT_CONDITIONAL: 1358 svm_exit_insn(vmcb, exit, NVMM_EXIT_MWAIT_COND); 1359 break; 1360 case VMCB_EXITCODE_XSETBV: 1361 svm_exit_xsetbv(mach, vcpu, exit); 1362 break; 1363 case VMCB_EXITCODE_NPF: 1364 svm_exit_npf(mach, vcpu, exit); 1365 break; 1366 case VMCB_EXITCODE_FERR_FREEZE: /* ? */ 1367 default: 1368 exit->reason = NVMM_EXIT_INVALID; 1369 break; 1370 } 1371 1372 /* If no reason to return to userland, keep rolling. */ 1373 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) { 1374 break; 1375 } 1376 if (curcpu()->ci_data.cpu_softints != 0) { 1377 break; 1378 } 1379 if (curlwp->l_flag & LW_USERRET) { 1380 break; 1381 } 1382 if (exit->reason != NVMM_EXIT_NONE) { 1383 break; 1384 } 1385 } 1386 1387 cpudata->gtsc = rdtsc() + vmcb->ctrl.tsc_offset; 1388 1389 svm_vcpu_guest_misc_leave(vcpu); 1390 svm_vcpu_guest_dbregs_leave(vcpu); 1391 1392 kpreempt_enable(); 1393 1394 exit->exitstate[NVMM_X64_EXITSTATE_CR8] = __SHIFTOUT(vmcb->ctrl.v, 1395 VMCB_CTRL_V_TPR); 1396 exit->exitstate[NVMM_X64_EXITSTATE_RFLAGS] = vmcb->state.rflags; 1397 1398 exit->exitstate[NVMM_X64_EXITSTATE_INT_SHADOW] = 1399 ((vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0); 1400 exit->exitstate[NVMM_X64_EXITSTATE_INT_WINDOW_EXIT] = 1401 cpudata->int_window_exit; 1402 exit->exitstate[NVMM_X64_EXITSTATE_NMI_WINDOW_EXIT] = 1403 cpudata->nmi_window_exit; 1404 exit->exitstate[NVMM_X64_EXITSTATE_EVT_PENDING] = 1405 cpudata->evt_pending; 1406 1407 return 0; 1408} 1409 1410/* -------------------------------------------------------------------------- */ 1411 1412static int 1413svm_memalloc(paddr_t *pa, vaddr_t *va, size_t npages) 1414{ 1415 struct pglist pglist; 1416 paddr_t _pa; 1417 vaddr_t _va; 1418 size_t i; 1419 int ret; 1420 1421 ret = uvm_pglistalloc(npages * PAGE_SIZE, 0, ~0UL, PAGE_SIZE, 0, 1422 &pglist, 1, 0); 1423 if (ret != 0) 1424 return ENOMEM; 1425 _pa = TAILQ_FIRST(&pglist)->phys_addr; 1426 _va = uvm_km_alloc(kernel_map, npages * PAGE_SIZE, 0, 1427 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 1428 if (_va == 0) 1429 goto error; 1430 1431 for (i = 0; i < npages; i++) { 1432 pmap_kenter_pa(_va + i * PAGE_SIZE, _pa + i * PAGE_SIZE, 1433 VM_PROT_READ | VM_PROT_WRITE, PMAP_WRITE_BACK); 1434 } 1435 pmap_update(pmap_kernel()); 1436 1437 memset((void *)_va, 0, npages * PAGE_SIZE); 1438 1439 *pa = _pa; 1440 *va = _va; 1441 return 0; 1442 1443error: 1444 for (i = 0; i < npages; i++) { 1445 uvm_pagefree(PHYS_TO_VM_PAGE(_pa + i * PAGE_SIZE)); 1446 } 1447 return ENOMEM; 1448} 1449 1450static void 1451svm_memfree(paddr_t pa, vaddr_t va, size_t npages) 1452{ 1453 size_t i; 1454 1455 pmap_kremove(va, npages * PAGE_SIZE); 1456 pmap_update(pmap_kernel()); 1457 uvm_km_free(kernel_map, va, npages * PAGE_SIZE, UVM_KMF_VAONLY); 1458 for (i = 0; i < npages; i++) { 1459 uvm_pagefree(PHYS_TO_VM_PAGE(pa + i * PAGE_SIZE)); 1460 } 1461} 1462 1463/* -------------------------------------------------------------------------- */ 1464 1465#define SVM_MSRBM_READ __BIT(0) 1466#define SVM_MSRBM_WRITE __BIT(1) 1467 1468static void 1469svm_vcpu_msr_allow(uint8_t *bitmap, uint64_t msr, bool read, bool write) 1470{ 1471 uint64_t byte; 1472 uint8_t bitoff; 1473 1474 if (msr < 0x00002000) { 1475 /* Range 1 */ 1476 byte = ((msr - 0x00000000) >> 2UL) + 0x0000; 1477 } else if (msr >= 0xC0000000 && msr < 0xC0002000) { 1478 /* Range 2 */ 1479 byte = ((msr - 0xC0000000) >> 2UL) + 0x0800; 1480 } else if (msr >= 0xC0010000 && msr < 0xC0012000) { 1481 /* Range 3 */ 1482 byte = ((msr - 0xC0010000) >> 2UL) + 0x1000; 1483 } else { 1484 panic("%s: wrong range", __func__); 1485 } 1486 1487 bitoff = (msr & 0x3) << 1; 1488 1489 if (read) { 1490 bitmap[byte] &= ~(SVM_MSRBM_READ << bitoff); 1491 } 1492 if (write) { 1493 bitmap[byte] &= ~(SVM_MSRBM_WRITE << bitoff); 1494 } 1495} 1496 1497#define SVM_SEG_ATTRIB_TYPE __BITS(3,0) 1498#define SVM_SEG_ATTRIB_S __BIT(4) 1499#define SVM_SEG_ATTRIB_DPL __BITS(6,5) 1500#define SVM_SEG_ATTRIB_P __BIT(7) 1501#define SVM_SEG_ATTRIB_AVL __BIT(8) 1502#define SVM_SEG_ATTRIB_L __BIT(9) 1503#define SVM_SEG_ATTRIB_DEF __BIT(10) 1504#define SVM_SEG_ATTRIB_G __BIT(11) 1505 1506static void 1507svm_vcpu_setstate_seg(const struct nvmm_x64_state_seg *seg, 1508 struct vmcb_segment *vseg) 1509{ 1510 vseg->selector = seg->selector; 1511 vseg->attrib = 1512 __SHIFTIN(seg->attrib.type, SVM_SEG_ATTRIB_TYPE) | 1513 __SHIFTIN(seg->attrib.s, SVM_SEG_ATTRIB_S) | 1514 __SHIFTIN(seg->attrib.dpl, SVM_SEG_ATTRIB_DPL) | 1515 __SHIFTIN(seg->attrib.p, SVM_SEG_ATTRIB_P) | 1516 __SHIFTIN(seg->attrib.avl, SVM_SEG_ATTRIB_AVL) | 1517 __SHIFTIN(seg->attrib.l, SVM_SEG_ATTRIB_L) | 1518 __SHIFTIN(seg->attrib.def, SVM_SEG_ATTRIB_DEF) | 1519 __SHIFTIN(seg->attrib.g, SVM_SEG_ATTRIB_G); 1520 vseg->limit = seg->limit; 1521 vseg->base = seg->base; 1522} 1523 1524static void 1525svm_vcpu_getstate_seg(struct nvmm_x64_state_seg *seg, struct vmcb_segment *vseg) 1526{ 1527 seg->selector = vseg->selector; 1528 seg->attrib.type = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_TYPE); 1529 seg->attrib.s = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_S); 1530 seg->attrib.dpl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DPL); 1531 seg->attrib.p = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_P); 1532 seg->attrib.avl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_AVL); 1533 seg->attrib.l = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_L); 1534 seg->attrib.def = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DEF); 1535 seg->attrib.g = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_G); 1536 seg->limit = vseg->limit; 1537 seg->base = vseg->base; 1538} 1539 1540static inline bool 1541svm_state_tlb_flush(const struct vmcb *vmcb, const struct nvmm_x64_state *state, 1542 uint64_t flags) 1543{ 1544 if (flags & NVMM_X64_STATE_CRS) { 1545 if ((vmcb->state.cr0 ^ 1546 state->crs[NVMM_X64_CR_CR0]) & CR0_TLB_FLUSH) { 1547 return true; 1548 } 1549 if (vmcb->state.cr3 != state->crs[NVMM_X64_CR_CR3]) { 1550 return true; 1551 } 1552 if ((vmcb->state.cr4 ^ 1553 state->crs[NVMM_X64_CR_CR4]) & CR4_TLB_FLUSH) { 1554 return true; 1555 } 1556 } 1557 1558 if (flags & NVMM_X64_STATE_MSRS) { 1559 if ((vmcb->state.efer ^ 1560 state->msrs[NVMM_X64_MSR_EFER]) & EFER_TLB_FLUSH) { 1561 return true; 1562 } 1563 } 1564 1565 return false; 1566} 1567 1568static void 1569svm_vcpu_setstate(struct nvmm_cpu *vcpu, const void *data, uint64_t flags) 1570{ 1571 const struct nvmm_x64_state *state = data; 1572 struct svm_cpudata *cpudata = vcpu->cpudata; 1573 struct vmcb *vmcb = cpudata->vmcb; 1574 struct fxsave *fpustate; 1575 1576 if (svm_state_tlb_flush(vmcb, state, flags)) { 1577 cpudata->gtlb_want_flush = true; 1578 } 1579 1580 if (flags & NVMM_X64_STATE_SEGS) { 1581 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_CS], 1582 &vmcb->state.cs); 1583 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_DS], 1584 &vmcb->state.ds); 1585 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_ES], 1586 &vmcb->state.es); 1587 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_FS], 1588 &vmcb->state.fs); 1589 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GS], 1590 &vmcb->state.gs); 1591 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_SS], 1592 &vmcb->state.ss); 1593 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1594 &vmcb->state.gdt); 1595 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1596 &vmcb->state.idt); 1597 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1598 &vmcb->state.ldt); 1599 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_TR], 1600 &vmcb->state.tr); 1601 1602 vmcb->state.cpl = state->segs[NVMM_X64_SEG_SS].attrib.dpl; 1603 } 1604 1605 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1606 if (flags & NVMM_X64_STATE_GPRS) { 1607 memcpy(cpudata->gprs, state->gprs, sizeof(state->gprs)); 1608 1609 vmcb->state.rip = state->gprs[NVMM_X64_GPR_RIP]; 1610 vmcb->state.rsp = state->gprs[NVMM_X64_GPR_RSP]; 1611 vmcb->state.rax = state->gprs[NVMM_X64_GPR_RAX]; 1612 vmcb->state.rflags = state->gprs[NVMM_X64_GPR_RFLAGS]; 1613 } 1614 1615 if (flags & NVMM_X64_STATE_CRS) { 1616 vmcb->state.cr0 = state->crs[NVMM_X64_CR_CR0]; 1617 vmcb->state.cr2 = state->crs[NVMM_X64_CR_CR2]; 1618 vmcb->state.cr3 = state->crs[NVMM_X64_CR_CR3]; 1619 vmcb->state.cr4 = state->crs[NVMM_X64_CR_CR4]; 1620 1621 vmcb->ctrl.v &= ~VMCB_CTRL_V_TPR; 1622 vmcb->ctrl.v |= __SHIFTIN(state->crs[NVMM_X64_CR_CR8], 1623 VMCB_CTRL_V_TPR); 1624 1625 if (svm_xcr0_mask != 0) { 1626 /* Clear illegal XCR0 bits, set mandatory X87 bit. */ 1627 cpudata->gxcr0 = state->crs[NVMM_X64_CR_XCR0]; 1628 cpudata->gxcr0 &= svm_xcr0_mask; 1629 cpudata->gxcr0 |= XCR0_X87; 1630 } 1631 } 1632 1633 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1634 if (flags & NVMM_X64_STATE_DRS) { 1635 memcpy(cpudata->drs, state->drs, sizeof(state->drs)); 1636 1637 vmcb->state.dr6 = state->drs[NVMM_X64_DR_DR6]; 1638 vmcb->state.dr7 = state->drs[NVMM_X64_DR_DR7]; 1639 } 1640 1641 if (flags & NVMM_X64_STATE_MSRS) { 1642 /* 1643 * EFER_SVME is mandatory. 1644 */ 1645 vmcb->state.efer = state->msrs[NVMM_X64_MSR_EFER] | EFER_SVME; 1646 vmcb->state.star = state->msrs[NVMM_X64_MSR_STAR]; 1647 vmcb->state.lstar = state->msrs[NVMM_X64_MSR_LSTAR]; 1648 vmcb->state.cstar = state->msrs[NVMM_X64_MSR_CSTAR]; 1649 vmcb->state.sfmask = state->msrs[NVMM_X64_MSR_SFMASK]; 1650 vmcb->state.kernelgsbase = 1651 state->msrs[NVMM_X64_MSR_KERNELGSBASE]; 1652 vmcb->state.sysenter_cs = 1653 state->msrs[NVMM_X64_MSR_SYSENTER_CS]; 1654 vmcb->state.sysenter_esp = 1655 state->msrs[NVMM_X64_MSR_SYSENTER_ESP]; 1656 vmcb->state.sysenter_eip = 1657 state->msrs[NVMM_X64_MSR_SYSENTER_EIP]; 1658 vmcb->state.g_pat = state->msrs[NVMM_X64_MSR_PAT]; 1659 1660 cpudata->gtsc = state->msrs[NVMM_X64_MSR_TSC]; 1661 cpudata->gtsc_want_update = true; 1662 } 1663 1664 if (flags & NVMM_X64_STATE_INTR) { 1665 if (state->intr.int_shadow) { 1666 vmcb->ctrl.intr |= VMCB_CTRL_INTR_SHADOW; 1667 } else { 1668 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 1669 } 1670 1671 if (state->intr.int_window_exiting) { 1672 svm_event_waitexit_enable(vcpu, false); 1673 } else { 1674 svm_event_waitexit_disable(vcpu, false); 1675 } 1676 1677 if (state->intr.nmi_window_exiting) { 1678 svm_event_waitexit_enable(vcpu, true); 1679 } else { 1680 svm_event_waitexit_disable(vcpu, true); 1681 } 1682 } 1683 1684 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 1685 if (flags & NVMM_X64_STATE_FPU) { 1686 memcpy(cpudata->gfpu.xsh_fxsave, &state->fpu, 1687 sizeof(state->fpu)); 1688 1689 fpustate = (struct fxsave *)cpudata->gfpu.xsh_fxsave; 1690 fpustate->fx_mxcsr_mask &= x86_fpu_mxcsr_mask; 1691 fpustate->fx_mxcsr &= fpustate->fx_mxcsr_mask; 1692 1693 if (svm_xcr0_mask != 0) { 1694 /* Reset XSTATE_BV, to force a reload. */ 1695 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 1696 } 1697 } 1698 1699 svm_vmcb_cache_update(vmcb, flags); 1700} 1701 1702static void 1703svm_vcpu_getstate(struct nvmm_cpu *vcpu, void *data, uint64_t flags) 1704{ 1705 struct nvmm_x64_state *state = (struct nvmm_x64_state *)data; 1706 struct svm_cpudata *cpudata = vcpu->cpudata; 1707 struct vmcb *vmcb = cpudata->vmcb; 1708 1709 if (flags & NVMM_X64_STATE_SEGS) { 1710 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_CS], 1711 &vmcb->state.cs); 1712 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_DS], 1713 &vmcb->state.ds); 1714 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_ES], 1715 &vmcb->state.es); 1716 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_FS], 1717 &vmcb->state.fs); 1718 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GS], 1719 &vmcb->state.gs); 1720 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_SS], 1721 &vmcb->state.ss); 1722 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1723 &vmcb->state.gdt); 1724 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1725 &vmcb->state.idt); 1726 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1727 &vmcb->state.ldt); 1728 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_TR], 1729 &vmcb->state.tr); 1730 1731 state->segs[NVMM_X64_SEG_SS].attrib.dpl = vmcb->state.cpl; 1732 } 1733 1734 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1735 if (flags & NVMM_X64_STATE_GPRS) { 1736 memcpy(state->gprs, cpudata->gprs, sizeof(state->gprs)); 1737 1738 state->gprs[NVMM_X64_GPR_RIP] = vmcb->state.rip; 1739 state->gprs[NVMM_X64_GPR_RSP] = vmcb->state.rsp; 1740 state->gprs[NVMM_X64_GPR_RAX] = vmcb->state.rax; 1741 state->gprs[NVMM_X64_GPR_RFLAGS] = vmcb->state.rflags; 1742 } 1743 1744 if (flags & NVMM_X64_STATE_CRS) { 1745 state->crs[NVMM_X64_CR_CR0] = vmcb->state.cr0; 1746 state->crs[NVMM_X64_CR_CR2] = vmcb->state.cr2; 1747 state->crs[NVMM_X64_CR_CR3] = vmcb->state.cr3; 1748 state->crs[NVMM_X64_CR_CR4] = vmcb->state.cr4; 1749 state->crs[NVMM_X64_CR_CR8] = __SHIFTOUT(vmcb->ctrl.v, 1750 VMCB_CTRL_V_TPR); 1751 state->crs[NVMM_X64_CR_XCR0] = cpudata->gxcr0; 1752 } 1753 1754 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1755 if (flags & NVMM_X64_STATE_DRS) { 1756 memcpy(state->drs, cpudata->drs, sizeof(state->drs)); 1757 1758 state->drs[NVMM_X64_DR_DR6] = vmcb->state.dr6; 1759 state->drs[NVMM_X64_DR_DR7] = vmcb->state.dr7; 1760 } 1761 1762 if (flags & NVMM_X64_STATE_MSRS) { 1763 state->msrs[NVMM_X64_MSR_EFER] = vmcb->state.efer; 1764 state->msrs[NVMM_X64_MSR_STAR] = vmcb->state.star; 1765 state->msrs[NVMM_X64_MSR_LSTAR] = vmcb->state.lstar; 1766 state->msrs[NVMM_X64_MSR_CSTAR] = vmcb->state.cstar; 1767 state->msrs[NVMM_X64_MSR_SFMASK] = vmcb->state.sfmask; 1768 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = 1769 vmcb->state.kernelgsbase; 1770 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = 1771 vmcb->state.sysenter_cs; 1772 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = 1773 vmcb->state.sysenter_esp; 1774 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = 1775 vmcb->state.sysenter_eip; 1776 state->msrs[NVMM_X64_MSR_PAT] = vmcb->state.g_pat; 1777 state->msrs[NVMM_X64_MSR_TSC] = cpudata->gtsc; 1778 1779 /* Hide SVME. */ 1780 state->msrs[NVMM_X64_MSR_EFER] &= ~EFER_SVME; 1781 } 1782 1783 if (flags & NVMM_X64_STATE_INTR) { 1784 state->intr.int_shadow = 1785 (vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0; 1786 state->intr.int_window_exiting = cpudata->int_window_exit; 1787 state->intr.nmi_window_exiting = cpudata->nmi_window_exit; 1788 state->intr.evt_pending = cpudata->evt_pending; 1789 } 1790 1791 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 1792 if (flags & NVMM_X64_STATE_FPU) { 1793 memcpy(&state->fpu, cpudata->gfpu.xsh_fxsave, 1794 sizeof(state->fpu)); 1795 } 1796} 1797 1798/* -------------------------------------------------------------------------- */ 1799 1800static void 1801svm_asid_alloc(struct nvmm_cpu *vcpu) 1802{ 1803 struct svm_cpudata *cpudata = vcpu->cpudata; 1804 struct vmcb *vmcb = cpudata->vmcb; 1805 size_t i, oct, bit; 1806 1807 mutex_enter(&svm_asidlock); 1808 1809 for (i = 0; i < svm_maxasid; i++) { 1810 oct = i / 8; 1811 bit = i % 8; 1812 1813 if (svm_asidmap[oct] & __BIT(bit)) { 1814 continue; 1815 } 1816 1817 svm_asidmap[oct] |= __BIT(bit); 1818 vmcb->ctrl.guest_asid = i; 1819 mutex_exit(&svm_asidlock); 1820 return; 1821 } 1822 1823 /* 1824 * No free ASID. Use the last one, which is shared and requires 1825 * special TLB handling. 1826 */ 1827 cpudata->shared_asid = true; 1828 vmcb->ctrl.guest_asid = svm_maxasid - 1; 1829 mutex_exit(&svm_asidlock); 1830} 1831 1832static void 1833svm_asid_free(struct nvmm_cpu *vcpu) 1834{ 1835 struct svm_cpudata *cpudata = vcpu->cpudata; 1836 struct vmcb *vmcb = cpudata->vmcb; 1837 size_t oct, bit; 1838 1839 if (cpudata->shared_asid) { 1840 return; 1841 } 1842 1843 oct = vmcb->ctrl.guest_asid / 8; 1844 bit = vmcb->ctrl.guest_asid % 8; 1845 1846 mutex_enter(&svm_asidlock); 1847 svm_asidmap[oct] &= ~__BIT(bit); 1848 mutex_exit(&svm_asidlock); 1849} 1850 1851static void 1852svm_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 1853{ 1854 struct svm_cpudata *cpudata = vcpu->cpudata; 1855 struct vmcb *vmcb = cpudata->vmcb; 1856 1857 /* Allow reads/writes of Control Registers. */ 1858 vmcb->ctrl.intercept_cr = 0; 1859 1860 /* Allow reads/writes of Debug Registers. */ 1861 vmcb->ctrl.intercept_dr = 0; 1862 1863 /* Allow exceptions 0 to 31. */ 1864 vmcb->ctrl.intercept_vec = 0; 1865 1866 /* 1867 * Allow: 1868 * - SMI [smm interrupts] 1869 * - VINTR [virtual interrupts] 1870 * - CR0_SPEC [CR0 writes changing other fields than CR0.TS or CR0.MP] 1871 * - RIDTR [reads of IDTR] 1872 * - RGDTR [reads of GDTR] 1873 * - RLDTR [reads of LDTR] 1874 * - RTR [reads of TR] 1875 * - WIDTR [writes of IDTR] 1876 * - WGDTR [writes of GDTR] 1877 * - WLDTR [writes of LDTR] 1878 * - WTR [writes of TR] 1879 * - RDTSC [rdtsc instruction] 1880 * - PUSHF [pushf instruction] 1881 * - POPF [popf instruction] 1882 * - IRET [iret instruction] 1883 * - INTN [int $n instructions] 1884 * - INVD [invd instruction] 1885 * - PAUSE [pause instruction] 1886 * - INVLPG [invplg instruction] 1887 * - TASKSW [task switches] 1888 * 1889 * Intercept the rest below. 1890 */ 1891 vmcb->ctrl.intercept_misc1 = 1892 VMCB_CTRL_INTERCEPT_INTR | 1893 VMCB_CTRL_INTERCEPT_NMI | 1894 VMCB_CTRL_INTERCEPT_INIT | 1895 VMCB_CTRL_INTERCEPT_RDPMC | 1896 VMCB_CTRL_INTERCEPT_CPUID | 1897 VMCB_CTRL_INTERCEPT_RSM | 1898 VMCB_CTRL_INTERCEPT_HLT | 1899 VMCB_CTRL_INTERCEPT_INVLPGA | 1900 VMCB_CTRL_INTERCEPT_IOIO_PROT | 1901 VMCB_CTRL_INTERCEPT_MSR_PROT | 1902 VMCB_CTRL_INTERCEPT_FERR_FREEZE | 1903 VMCB_CTRL_INTERCEPT_SHUTDOWN; 1904 1905 /* 1906 * Allow: 1907 * - ICEBP [icebp instruction] 1908 * - WBINVD [wbinvd instruction] 1909 * - WCR_SPEC(0..15) [writes of CR0-15, received after instruction] 1910 * 1911 * Intercept the rest below. 1912 */ 1913 vmcb->ctrl.intercept_misc2 = 1914 VMCB_CTRL_INTERCEPT_VMRUN | 1915 VMCB_CTRL_INTERCEPT_VMMCALL | 1916 VMCB_CTRL_INTERCEPT_VMLOAD | 1917 VMCB_CTRL_INTERCEPT_VMSAVE | 1918 VMCB_CTRL_INTERCEPT_STGI | 1919 VMCB_CTRL_INTERCEPT_CLGI | 1920 VMCB_CTRL_INTERCEPT_SKINIT | 1921 VMCB_CTRL_INTERCEPT_RDTSCP | 1922 VMCB_CTRL_INTERCEPT_MONITOR | 1923 VMCB_CTRL_INTERCEPT_MWAIT | 1924 VMCB_CTRL_INTERCEPT_XSETBV; 1925 1926 /* Intercept all I/O accesses. */ 1927 memset(cpudata->iobm, 0xFF, IOBM_SIZE); 1928 vmcb->ctrl.iopm_base_pa = cpudata->iobm_pa; 1929 1930 /* Allow direct access to certain MSRs. */ 1931 memset(cpudata->msrbm, 0xFF, MSRBM_SIZE); 1932 svm_vcpu_msr_allow(cpudata->msrbm, MSR_EFER, true, false); 1933 svm_vcpu_msr_allow(cpudata->msrbm, MSR_STAR, true, true); 1934 svm_vcpu_msr_allow(cpudata->msrbm, MSR_LSTAR, true, true); 1935 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CSTAR, true, true); 1936 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SFMASK, true, true); 1937 svm_vcpu_msr_allow(cpudata->msrbm, MSR_KERNELGSBASE, true, true); 1938 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_CS, true, true); 1939 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_ESP, true, true); 1940 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_EIP, true, true); 1941 svm_vcpu_msr_allow(cpudata->msrbm, MSR_FSBASE, true, true); 1942 svm_vcpu_msr_allow(cpudata->msrbm, MSR_GSBASE, true, true); 1943 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CR_PAT, true, true); 1944 svm_vcpu_msr_allow(cpudata->msrbm, MSR_TSC, true, false); 1945 vmcb->ctrl.msrpm_base_pa = cpudata->msrbm_pa; 1946 1947 /* Generate ASID. */ 1948 svm_asid_alloc(vcpu); 1949 1950 /* Virtual TPR. */ 1951 vmcb->ctrl.v = VMCB_CTRL_V_INTR_MASKING; 1952 1953 /* Enable Nested Paging. */ 1954 vmcb->ctrl.enable1 = VMCB_CTRL_ENABLE_NP; 1955 vmcb->ctrl.n_cr3 = mach->vm->vm_map.pmap->pm_pdirpa[0]; 1956 1957 /* Init XSAVE header. */ 1958 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 1959 cpudata->gfpu.xsh_xcomp_bv = 0; 1960 1961 /* These MSRs are static. */ 1962 cpudata->star = rdmsr(MSR_STAR); 1963 cpudata->lstar = rdmsr(MSR_LSTAR); 1964 cpudata->cstar = rdmsr(MSR_CSTAR); 1965 cpudata->sfmask = rdmsr(MSR_SFMASK); 1966 1967 /* Install the RESET state. */ 1968 svm_vcpu_setstate(vcpu, &nvmm_x86_reset_state, NVMM_X64_STATE_ALL); 1969} 1970 1971static int 1972svm_vcpu_create(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 1973{ 1974 struct svm_cpudata *cpudata; 1975 int error; 1976 1977 /* Allocate the SVM cpudata. */ 1978 cpudata = (struct svm_cpudata *)uvm_km_alloc(kernel_map, 1979 roundup(sizeof(*cpudata), PAGE_SIZE), 0, 1980 UVM_KMF_WIRED|UVM_KMF_ZERO); 1981 vcpu->cpudata = cpudata; 1982 1983 /* VMCB */ 1984 error = svm_memalloc(&cpudata->vmcb_pa, (vaddr_t *)&cpudata->vmcb, 1985 VMCB_NPAGES); 1986 if (error) 1987 goto error; 1988 1989 /* I/O Bitmap */ 1990 error = svm_memalloc(&cpudata->iobm_pa, (vaddr_t *)&cpudata->iobm, 1991 IOBM_NPAGES); 1992 if (error) 1993 goto error; 1994 1995 /* MSR Bitmap */ 1996 error = svm_memalloc(&cpudata->msrbm_pa, (vaddr_t *)&cpudata->msrbm, 1997 MSRBM_NPAGES); 1998 if (error) 1999 goto error; 2000 2001 /* Init the VCPU info. */ 2002 svm_vcpu_init(mach, vcpu); 2003 2004 return 0; 2005 2006error: 2007 if (cpudata->vmcb_pa) { 2008 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, 2009 VMCB_NPAGES); 2010 } 2011 if (cpudata->iobm_pa) { 2012 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, 2013 IOBM_NPAGES); 2014 } 2015 if (cpudata->msrbm_pa) { 2016 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, 2017 MSRBM_NPAGES); 2018 } 2019 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2020 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2021 return error; 2022} 2023 2024static void 2025svm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2026{ 2027 struct svm_cpudata *cpudata = vcpu->cpudata; 2028 2029 svm_asid_free(vcpu); 2030 2031 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, VMCB_NPAGES); 2032 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, IOBM_NPAGES); 2033 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, MSRBM_NPAGES); 2034 2035 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2036 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2037} 2038 2039/* -------------------------------------------------------------------------- */ 2040 2041static void 2042svm_tlb_flush(struct pmap *pm) 2043{ 2044 struct nvmm_machine *mach = pm->pm_data; 2045 struct svm_machdata *machdata = mach->machdata; 2046 2047 atomic_inc_64(&machdata->mach_htlb_gen); 2048 2049 /* Generates IPIs, which cause #VMEXITs. */ 2050 pmap_tlb_shootdown(pmap_kernel(), -1, PG_G, TLBSHOOT_UPDATE); 2051} 2052 2053static void 2054svm_machine_create(struct nvmm_machine *mach) 2055{ 2056 struct svm_machdata *machdata; 2057 2058 /* Fill in pmap info. */ 2059 mach->vm->vm_map.pmap->pm_data = (void *)mach; 2060 mach->vm->vm_map.pmap->pm_tlb_flush = svm_tlb_flush; 2061 2062 machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP); 2063 mach->machdata = machdata; 2064 2065 /* Start with an hTLB flush everywhere. */ 2066 machdata->mach_htlb_gen = 1; 2067} 2068 2069static void 2070svm_machine_destroy(struct nvmm_machine *mach) 2071{ 2072 kmem_free(mach->machdata, sizeof(struct svm_machdata)); 2073} 2074 2075static int 2076svm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *data) 2077{ 2078 struct nvmm_x86_conf_cpuid *cpuid = data; 2079 struct svm_machdata *machdata = (struct svm_machdata *)mach->machdata; 2080 size_t i; 2081 2082 if (__predict_false(op != NVMM_X86_CONF_CPUID)) { 2083 return EINVAL; 2084 } 2085 2086 if (__predict_false((cpuid->set.eax & cpuid->del.eax) || 2087 (cpuid->set.ebx & cpuid->del.ebx) || 2088 (cpuid->set.ecx & cpuid->del.ecx) || 2089 (cpuid->set.edx & cpuid->del.edx))) { 2090 return EINVAL; 2091 } 2092 2093 /* If already here, replace. */ 2094 for (i = 0; i < SVM_NCPUIDS; i++) { 2095 if (!machdata->cpuidpresent[i]) { 2096 continue; 2097 } 2098 if (machdata->cpuid[i].leaf == cpuid->leaf) { 2099 memcpy(&machdata->cpuid[i], cpuid, 2100 sizeof(struct nvmm_x86_conf_cpuid)); 2101 return 0; 2102 } 2103 } 2104 2105 /* Not here, insert. */ 2106 for (i = 0; i < SVM_NCPUIDS; i++) { 2107 if (!machdata->cpuidpresent[i]) { 2108 machdata->cpuidpresent[i] = true; 2109 memcpy(&machdata->cpuid[i], cpuid, 2110 sizeof(struct nvmm_x86_conf_cpuid)); 2111 return 0; 2112 } 2113 } 2114 2115 return ENOBUFS; 2116} 2117 2118/* -------------------------------------------------------------------------- */ 2119 2120static bool 2121svm_ident(void) 2122{ 2123 u_int descs[4]; 2124 uint64_t msr; 2125 2126 if (cpu_vendor != CPUVENDOR_AMD) { 2127 return false; 2128 } 2129 if (!(cpu_feature[3] & CPUID_SVM)) { 2130 return false; 2131 } 2132 2133 if (curcpu()->ci_max_ext_cpuid < 0x8000000a) { 2134 return false; 2135 } 2136 x86_cpuid(0x8000000a, descs); 2137 2138 /* Want Nested Paging. */ 2139 if (!(descs[3] & CPUID_AMD_SVM_NP)) { 2140 return false; 2141 } 2142 2143 /* Want nRIP. */ 2144 if (!(descs[3] & CPUID_AMD_SVM_NRIPS)) { 2145 return false; 2146 } 2147 2148 svm_decode_assist = (descs[3] & CPUID_AMD_SVM_DecodeAssist) != 0; 2149 2150 msr = rdmsr(MSR_VMCR); 2151 if ((msr & VMCR_SVMED) && (msr & VMCR_LOCK)) { 2152 return false; 2153 } 2154 2155 return true; 2156} 2157 2158static void 2159svm_init_asid(uint32_t maxasid) 2160{ 2161 size_t i, j, allocsz; 2162 2163 mutex_init(&svm_asidlock, MUTEX_DEFAULT, IPL_NONE); 2164 2165 /* Arbitrarily limit. */ 2166 maxasid = uimin(maxasid, 8192); 2167 2168 svm_maxasid = maxasid; 2169 allocsz = roundup(maxasid, 8) / 8; 2170 svm_asidmap = kmem_zalloc(allocsz, KM_SLEEP); 2171 2172 /* ASID 0 is reserved for the host. */ 2173 svm_asidmap[0] |= __BIT(0); 2174 2175 /* ASID n-1 is special, we share it. */ 2176 i = (maxasid - 1) / 8; 2177 j = (maxasid - 1) % 8; 2178 svm_asidmap[i] |= __BIT(j); 2179} 2180 2181static void 2182svm_change_cpu(void *arg1, void *arg2) 2183{ 2184 bool enable = (bool)arg1; 2185 uint64_t msr; 2186 2187 msr = rdmsr(MSR_VMCR); 2188 if (msr & VMCR_SVMED) { 2189 wrmsr(MSR_VMCR, msr & ~VMCR_SVMED); 2190 } 2191 2192 if (!enable) { 2193 wrmsr(MSR_VM_HSAVE_PA, 0); 2194 } 2195 2196 msr = rdmsr(MSR_EFER); 2197 if (enable) { 2198 msr |= EFER_SVME; 2199 } else { 2200 msr &= ~EFER_SVME; 2201 } 2202 wrmsr(MSR_EFER, msr); 2203 2204 if (enable) { 2205 wrmsr(MSR_VM_HSAVE_PA, hsave[cpu_index(curcpu())].pa); 2206 } 2207} 2208 2209static void 2210svm_init(void) 2211{ 2212 CPU_INFO_ITERATOR cii; 2213 struct cpu_info *ci; 2214 struct vm_page *pg; 2215 u_int descs[4]; 2216 uint64_t xc; 2217 2218 x86_cpuid(0x8000000a, descs); 2219 2220 /* The guest TLB flush command. */ 2221 if (descs[3] & CPUID_AMD_SVM_FlushByASID) { 2222 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_GUEST; 2223 } else { 2224 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_ALL; 2225 } 2226 2227 /* Init the ASID. */ 2228 svm_init_asid(descs[1]); 2229 2230 /* Init the XCR0 mask. */ 2231 svm_xcr0_mask = SVM_XCR0_MASK_DEFAULT & x86_xsave_features; 2232 2233 memset(hsave, 0, sizeof(hsave)); 2234 for (CPU_INFO_FOREACH(cii, ci)) { 2235 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); 2236 hsave[cpu_index(ci)].pa = VM_PAGE_TO_PHYS(pg); 2237 } 2238 2239 xc = xc_broadcast(0, svm_change_cpu, (void *)true, NULL); 2240 xc_wait(xc); 2241} 2242 2243static void 2244svm_fini_asid(void) 2245{ 2246 size_t allocsz; 2247 2248 allocsz = roundup(svm_maxasid, 8) / 8; 2249 kmem_free(svm_asidmap, allocsz); 2250 2251 mutex_destroy(&svm_asidlock); 2252} 2253 2254static void 2255svm_fini(void) 2256{ 2257 uint64_t xc; 2258 size_t i; 2259 2260 xc = xc_broadcast(0, svm_change_cpu, (void *)false, NULL); 2261 xc_wait(xc); 2262 2263 for (i = 0; i < MAXCPUS; i++) { 2264 if (hsave[i].pa != 0) 2265 uvm_pagefree(PHYS_TO_VM_PAGE(hsave[i].pa)); 2266 } 2267 2268 svm_fini_asid(); 2269} 2270 2271static void 2272svm_capability(struct nvmm_capability *cap) 2273{ 2274 cap->u.x86.xcr0_mask = svm_xcr0_mask; 2275 cap->u.x86.mxcsr_mask = x86_fpu_mxcsr_mask; 2276 cap->u.x86.conf_cpuid_maxops = SVM_NCPUIDS; 2277} 2278 2279const struct nvmm_impl nvmm_x86_svm = { 2280 .ident = svm_ident, 2281 .init = svm_init, 2282 .fini = svm_fini, 2283 .capability = svm_capability, 2284 .conf_max = NVMM_X86_NCONF, 2285 .conf_sizes = svm_conf_sizes, 2286 .state_size = sizeof(struct nvmm_x64_state), 2287 .machine_create = svm_machine_create, 2288 .machine_destroy = svm_machine_destroy, 2289 .machine_configure = svm_machine_configure, 2290 .vcpu_create = svm_vcpu_create, 2291 .vcpu_destroy = svm_vcpu_destroy, 2292 .vcpu_setstate = svm_vcpu_setstate, 2293 .vcpu_getstate = svm_vcpu_getstate, 2294 .vcpu_inject = svm_vcpu_inject, 2295 .vcpu_run = svm_vcpu_run 2296}; 2297