nvmm_x86_svm.c revision 1.44
1/* $NetBSD: nvmm_x86_svm.c,v 1.44 2019/04/29 18:54:25 maxv Exp $ */ 2 3/* 4 * Copyright (c) 2018 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.44 2019/04/29 18:54:25 maxv Exp $"); 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/kmem.h> 39#include <sys/cpu.h> 40#include <sys/xcall.h> 41#include <sys/mman.h> 42 43#include <uvm/uvm.h> 44#include <uvm/uvm_page.h> 45 46#include <x86/cputypes.h> 47#include <x86/specialreg.h> 48#include <x86/pmap.h> 49#include <x86/dbregs.h> 50#include <x86/cpu_counter.h> 51#include <machine/cpuvar.h> 52 53#include <dev/nvmm/nvmm.h> 54#include <dev/nvmm/nvmm_internal.h> 55#include <dev/nvmm/x86/nvmm_x86.h> 56 57int svm_vmrun(paddr_t, uint64_t *); 58 59#define MSR_VM_HSAVE_PA 0xC0010117 60 61/* -------------------------------------------------------------------------- */ 62 63#define VMCB_EXITCODE_CR0_READ 0x0000 64#define VMCB_EXITCODE_CR1_READ 0x0001 65#define VMCB_EXITCODE_CR2_READ 0x0002 66#define VMCB_EXITCODE_CR3_READ 0x0003 67#define VMCB_EXITCODE_CR4_READ 0x0004 68#define VMCB_EXITCODE_CR5_READ 0x0005 69#define VMCB_EXITCODE_CR6_READ 0x0006 70#define VMCB_EXITCODE_CR7_READ 0x0007 71#define VMCB_EXITCODE_CR8_READ 0x0008 72#define VMCB_EXITCODE_CR9_READ 0x0009 73#define VMCB_EXITCODE_CR10_READ 0x000A 74#define VMCB_EXITCODE_CR11_READ 0x000B 75#define VMCB_EXITCODE_CR12_READ 0x000C 76#define VMCB_EXITCODE_CR13_READ 0x000D 77#define VMCB_EXITCODE_CR14_READ 0x000E 78#define VMCB_EXITCODE_CR15_READ 0x000F 79#define VMCB_EXITCODE_CR0_WRITE 0x0010 80#define VMCB_EXITCODE_CR1_WRITE 0x0011 81#define VMCB_EXITCODE_CR2_WRITE 0x0012 82#define VMCB_EXITCODE_CR3_WRITE 0x0013 83#define VMCB_EXITCODE_CR4_WRITE 0x0014 84#define VMCB_EXITCODE_CR5_WRITE 0x0015 85#define VMCB_EXITCODE_CR6_WRITE 0x0016 86#define VMCB_EXITCODE_CR7_WRITE 0x0017 87#define VMCB_EXITCODE_CR8_WRITE 0x0018 88#define VMCB_EXITCODE_CR9_WRITE 0x0019 89#define VMCB_EXITCODE_CR10_WRITE 0x001A 90#define VMCB_EXITCODE_CR11_WRITE 0x001B 91#define VMCB_EXITCODE_CR12_WRITE 0x001C 92#define VMCB_EXITCODE_CR13_WRITE 0x001D 93#define VMCB_EXITCODE_CR14_WRITE 0x001E 94#define VMCB_EXITCODE_CR15_WRITE 0x001F 95#define VMCB_EXITCODE_DR0_READ 0x0020 96#define VMCB_EXITCODE_DR1_READ 0x0021 97#define VMCB_EXITCODE_DR2_READ 0x0022 98#define VMCB_EXITCODE_DR3_READ 0x0023 99#define VMCB_EXITCODE_DR4_READ 0x0024 100#define VMCB_EXITCODE_DR5_READ 0x0025 101#define VMCB_EXITCODE_DR6_READ 0x0026 102#define VMCB_EXITCODE_DR7_READ 0x0027 103#define VMCB_EXITCODE_DR8_READ 0x0028 104#define VMCB_EXITCODE_DR9_READ 0x0029 105#define VMCB_EXITCODE_DR10_READ 0x002A 106#define VMCB_EXITCODE_DR11_READ 0x002B 107#define VMCB_EXITCODE_DR12_READ 0x002C 108#define VMCB_EXITCODE_DR13_READ 0x002D 109#define VMCB_EXITCODE_DR14_READ 0x002E 110#define VMCB_EXITCODE_DR15_READ 0x002F 111#define VMCB_EXITCODE_DR0_WRITE 0x0030 112#define VMCB_EXITCODE_DR1_WRITE 0x0031 113#define VMCB_EXITCODE_DR2_WRITE 0x0032 114#define VMCB_EXITCODE_DR3_WRITE 0x0033 115#define VMCB_EXITCODE_DR4_WRITE 0x0034 116#define VMCB_EXITCODE_DR5_WRITE 0x0035 117#define VMCB_EXITCODE_DR6_WRITE 0x0036 118#define VMCB_EXITCODE_DR7_WRITE 0x0037 119#define VMCB_EXITCODE_DR8_WRITE 0x0038 120#define VMCB_EXITCODE_DR9_WRITE 0x0039 121#define VMCB_EXITCODE_DR10_WRITE 0x003A 122#define VMCB_EXITCODE_DR11_WRITE 0x003B 123#define VMCB_EXITCODE_DR12_WRITE 0x003C 124#define VMCB_EXITCODE_DR13_WRITE 0x003D 125#define VMCB_EXITCODE_DR14_WRITE 0x003E 126#define VMCB_EXITCODE_DR15_WRITE 0x003F 127#define VMCB_EXITCODE_EXCP0 0x0040 128#define VMCB_EXITCODE_EXCP1 0x0041 129#define VMCB_EXITCODE_EXCP2 0x0042 130#define VMCB_EXITCODE_EXCP3 0x0043 131#define VMCB_EXITCODE_EXCP4 0x0044 132#define VMCB_EXITCODE_EXCP5 0x0045 133#define VMCB_EXITCODE_EXCP6 0x0046 134#define VMCB_EXITCODE_EXCP7 0x0047 135#define VMCB_EXITCODE_EXCP8 0x0048 136#define VMCB_EXITCODE_EXCP9 0x0049 137#define VMCB_EXITCODE_EXCP10 0x004A 138#define VMCB_EXITCODE_EXCP11 0x004B 139#define VMCB_EXITCODE_EXCP12 0x004C 140#define VMCB_EXITCODE_EXCP13 0x004D 141#define VMCB_EXITCODE_EXCP14 0x004E 142#define VMCB_EXITCODE_EXCP15 0x004F 143#define VMCB_EXITCODE_EXCP16 0x0050 144#define VMCB_EXITCODE_EXCP17 0x0051 145#define VMCB_EXITCODE_EXCP18 0x0052 146#define VMCB_EXITCODE_EXCP19 0x0053 147#define VMCB_EXITCODE_EXCP20 0x0054 148#define VMCB_EXITCODE_EXCP21 0x0055 149#define VMCB_EXITCODE_EXCP22 0x0056 150#define VMCB_EXITCODE_EXCP23 0x0057 151#define VMCB_EXITCODE_EXCP24 0x0058 152#define VMCB_EXITCODE_EXCP25 0x0059 153#define VMCB_EXITCODE_EXCP26 0x005A 154#define VMCB_EXITCODE_EXCP27 0x005B 155#define VMCB_EXITCODE_EXCP28 0x005C 156#define VMCB_EXITCODE_EXCP29 0x005D 157#define VMCB_EXITCODE_EXCP30 0x005E 158#define VMCB_EXITCODE_EXCP31 0x005F 159#define VMCB_EXITCODE_INTR 0x0060 160#define VMCB_EXITCODE_NMI 0x0061 161#define VMCB_EXITCODE_SMI 0x0062 162#define VMCB_EXITCODE_INIT 0x0063 163#define VMCB_EXITCODE_VINTR 0x0064 164#define VMCB_EXITCODE_CR0_SEL_WRITE 0x0065 165#define VMCB_EXITCODE_IDTR_READ 0x0066 166#define VMCB_EXITCODE_GDTR_READ 0x0067 167#define VMCB_EXITCODE_LDTR_READ 0x0068 168#define VMCB_EXITCODE_TR_READ 0x0069 169#define VMCB_EXITCODE_IDTR_WRITE 0x006A 170#define VMCB_EXITCODE_GDTR_WRITE 0x006B 171#define VMCB_EXITCODE_LDTR_WRITE 0x006C 172#define VMCB_EXITCODE_TR_WRITE 0x006D 173#define VMCB_EXITCODE_RDTSC 0x006E 174#define VMCB_EXITCODE_RDPMC 0x006F 175#define VMCB_EXITCODE_PUSHF 0x0070 176#define VMCB_EXITCODE_POPF 0x0071 177#define VMCB_EXITCODE_CPUID 0x0072 178#define VMCB_EXITCODE_RSM 0x0073 179#define VMCB_EXITCODE_IRET 0x0074 180#define VMCB_EXITCODE_SWINT 0x0075 181#define VMCB_EXITCODE_INVD 0x0076 182#define VMCB_EXITCODE_PAUSE 0x0077 183#define VMCB_EXITCODE_HLT 0x0078 184#define VMCB_EXITCODE_INVLPG 0x0079 185#define VMCB_EXITCODE_INVLPGA 0x007A 186#define VMCB_EXITCODE_IOIO 0x007B 187#define VMCB_EXITCODE_MSR 0x007C 188#define VMCB_EXITCODE_TASK_SWITCH 0x007D 189#define VMCB_EXITCODE_FERR_FREEZE 0x007E 190#define VMCB_EXITCODE_SHUTDOWN 0x007F 191#define VMCB_EXITCODE_VMRUN 0x0080 192#define VMCB_EXITCODE_VMMCALL 0x0081 193#define VMCB_EXITCODE_VMLOAD 0x0082 194#define VMCB_EXITCODE_VMSAVE 0x0083 195#define VMCB_EXITCODE_STGI 0x0084 196#define VMCB_EXITCODE_CLGI 0x0085 197#define VMCB_EXITCODE_SKINIT 0x0086 198#define VMCB_EXITCODE_RDTSCP 0x0087 199#define VMCB_EXITCODE_ICEBP 0x0088 200#define VMCB_EXITCODE_WBINVD 0x0089 201#define VMCB_EXITCODE_MONITOR 0x008A 202#define VMCB_EXITCODE_MWAIT 0x008B 203#define VMCB_EXITCODE_MWAIT_CONDITIONAL 0x008C 204#define VMCB_EXITCODE_XSETBV 0x008D 205#define VMCB_EXITCODE_EFER_WRITE_TRAP 0x008F 206#define VMCB_EXITCODE_CR0_WRITE_TRAP 0x0090 207#define VMCB_EXITCODE_CR1_WRITE_TRAP 0x0091 208#define VMCB_EXITCODE_CR2_WRITE_TRAP 0x0092 209#define VMCB_EXITCODE_CR3_WRITE_TRAP 0x0093 210#define VMCB_EXITCODE_CR4_WRITE_TRAP 0x0094 211#define VMCB_EXITCODE_CR5_WRITE_TRAP 0x0095 212#define VMCB_EXITCODE_CR6_WRITE_TRAP 0x0096 213#define VMCB_EXITCODE_CR7_WRITE_TRAP 0x0097 214#define VMCB_EXITCODE_CR8_WRITE_TRAP 0x0098 215#define VMCB_EXITCODE_CR9_WRITE_TRAP 0x0099 216#define VMCB_EXITCODE_CR10_WRITE_TRAP 0x009A 217#define VMCB_EXITCODE_CR11_WRITE_TRAP 0x009B 218#define VMCB_EXITCODE_CR12_WRITE_TRAP 0x009C 219#define VMCB_EXITCODE_CR13_WRITE_TRAP 0x009D 220#define VMCB_EXITCODE_CR14_WRITE_TRAP 0x009E 221#define VMCB_EXITCODE_CR15_WRITE_TRAP 0x009F 222#define VMCB_EXITCODE_NPF 0x0400 223#define VMCB_EXITCODE_AVIC_INCOMP_IPI 0x0401 224#define VMCB_EXITCODE_AVIC_NOACCEL 0x0402 225#define VMCB_EXITCODE_VMGEXIT 0x0403 226#define VMCB_EXITCODE_INVALID -1 227 228/* -------------------------------------------------------------------------- */ 229 230struct vmcb_ctrl { 231 uint32_t intercept_cr; 232#define VMCB_CTRL_INTERCEPT_RCR(x) __BIT( 0 + x) 233#define VMCB_CTRL_INTERCEPT_WCR(x) __BIT(16 + x) 234 235 uint32_t intercept_dr; 236#define VMCB_CTRL_INTERCEPT_RDR(x) __BIT( 0 + x) 237#define VMCB_CTRL_INTERCEPT_WDR(x) __BIT(16 + x) 238 239 uint32_t intercept_vec; 240#define VMCB_CTRL_INTERCEPT_VEC(x) __BIT(x) 241 242 uint32_t intercept_misc1; 243#define VMCB_CTRL_INTERCEPT_INTR __BIT(0) 244#define VMCB_CTRL_INTERCEPT_NMI __BIT(1) 245#define VMCB_CTRL_INTERCEPT_SMI __BIT(2) 246#define VMCB_CTRL_INTERCEPT_INIT __BIT(3) 247#define VMCB_CTRL_INTERCEPT_VINTR __BIT(4) 248#define VMCB_CTRL_INTERCEPT_CR0_SPEC __BIT(5) 249#define VMCB_CTRL_INTERCEPT_RIDTR __BIT(6) 250#define VMCB_CTRL_INTERCEPT_RGDTR __BIT(7) 251#define VMCB_CTRL_INTERCEPT_RLDTR __BIT(8) 252#define VMCB_CTRL_INTERCEPT_RTR __BIT(9) 253#define VMCB_CTRL_INTERCEPT_WIDTR __BIT(10) 254#define VMCB_CTRL_INTERCEPT_WGDTR __BIT(11) 255#define VMCB_CTRL_INTERCEPT_WLDTR __BIT(12) 256#define VMCB_CTRL_INTERCEPT_WTR __BIT(13) 257#define VMCB_CTRL_INTERCEPT_RDTSC __BIT(14) 258#define VMCB_CTRL_INTERCEPT_RDPMC __BIT(15) 259#define VMCB_CTRL_INTERCEPT_PUSHF __BIT(16) 260#define VMCB_CTRL_INTERCEPT_POPF __BIT(17) 261#define VMCB_CTRL_INTERCEPT_CPUID __BIT(18) 262#define VMCB_CTRL_INTERCEPT_RSM __BIT(19) 263#define VMCB_CTRL_INTERCEPT_IRET __BIT(20) 264#define VMCB_CTRL_INTERCEPT_INTN __BIT(21) 265#define VMCB_CTRL_INTERCEPT_INVD __BIT(22) 266#define VMCB_CTRL_INTERCEPT_PAUSE __BIT(23) 267#define VMCB_CTRL_INTERCEPT_HLT __BIT(24) 268#define VMCB_CTRL_INTERCEPT_INVLPG __BIT(25) 269#define VMCB_CTRL_INTERCEPT_INVLPGA __BIT(26) 270#define VMCB_CTRL_INTERCEPT_IOIO_PROT __BIT(27) 271#define VMCB_CTRL_INTERCEPT_MSR_PROT __BIT(28) 272#define VMCB_CTRL_INTERCEPT_TASKSW __BIT(29) 273#define VMCB_CTRL_INTERCEPT_FERR_FREEZE __BIT(30) 274#define VMCB_CTRL_INTERCEPT_SHUTDOWN __BIT(31) 275 276 uint32_t intercept_misc2; 277#define VMCB_CTRL_INTERCEPT_VMRUN __BIT(0) 278#define VMCB_CTRL_INTERCEPT_VMMCALL __BIT(1) 279#define VMCB_CTRL_INTERCEPT_VMLOAD __BIT(2) 280#define VMCB_CTRL_INTERCEPT_VMSAVE __BIT(3) 281#define VMCB_CTRL_INTERCEPT_STGI __BIT(4) 282#define VMCB_CTRL_INTERCEPT_CLGI __BIT(5) 283#define VMCB_CTRL_INTERCEPT_SKINIT __BIT(6) 284#define VMCB_CTRL_INTERCEPT_RDTSCP __BIT(7) 285#define VMCB_CTRL_INTERCEPT_ICEBP __BIT(8) 286#define VMCB_CTRL_INTERCEPT_WBINVD __BIT(9) 287#define VMCB_CTRL_INTERCEPT_MONITOR __BIT(10) 288#define VMCB_CTRL_INTERCEPT_MWAIT __BIT(12) 289#define VMCB_CTRL_INTERCEPT_XSETBV __BIT(13) 290#define VMCB_CTRL_INTERCEPT_EFER_SPEC __BIT(15) 291#define VMCB_CTRL_INTERCEPT_WCR_SPEC(x) __BIT(16 + x) 292 293 uint8_t rsvd1[40]; 294 uint16_t pause_filt_thresh; 295 uint16_t pause_filt_cnt; 296 uint64_t iopm_base_pa; 297 uint64_t msrpm_base_pa; 298 uint64_t tsc_offset; 299 uint32_t guest_asid; 300 301 uint32_t tlb_ctrl; 302#define VMCB_CTRL_TLB_CTRL_FLUSH_ALL 0x01 303#define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST 0x03 304#define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST_NONGLOBAL 0x07 305 306 uint64_t v; 307#define VMCB_CTRL_V_TPR __BITS(3,0) 308#define VMCB_CTRL_V_IRQ __BIT(8) 309#define VMCB_CTRL_V_VGIF __BIT(9) 310#define VMCB_CTRL_V_INTR_PRIO __BITS(19,16) 311#define VMCB_CTRL_V_IGN_TPR __BIT(20) 312#define VMCB_CTRL_V_INTR_MASKING __BIT(24) 313#define VMCB_CTRL_V_GUEST_VGIF __BIT(25) 314#define VMCB_CTRL_V_AVIC_EN __BIT(31) 315#define VMCB_CTRL_V_INTR_VECTOR __BITS(39,32) 316 317 uint64_t intr; 318#define VMCB_CTRL_INTR_SHADOW __BIT(0) 319 320 uint64_t exitcode; 321 uint64_t exitinfo1; 322 uint64_t exitinfo2; 323 324 uint64_t exitintinfo; 325#define VMCB_CTRL_EXITINTINFO_VECTOR __BITS(7,0) 326#define VMCB_CTRL_EXITINTINFO_TYPE __BITS(10,8) 327#define VMCB_CTRL_EXITINTINFO_EV __BIT(11) 328#define VMCB_CTRL_EXITINTINFO_V __BIT(31) 329#define VMCB_CTRL_EXITINTINFO_ERRORCODE __BITS(63,32) 330 331 uint64_t enable1; 332#define VMCB_CTRL_ENABLE_NP __BIT(0) 333#define VMCB_CTRL_ENABLE_SEV __BIT(1) 334#define VMCB_CTRL_ENABLE_ES_SEV __BIT(2) 335 336 uint64_t avic; 337#define VMCB_CTRL_AVIC_APIC_BAR __BITS(51,0) 338 339 uint64_t ghcb; 340 341 uint64_t eventinj; 342#define VMCB_CTRL_EVENTINJ_VECTOR __BITS(7,0) 343#define VMCB_CTRL_EVENTINJ_TYPE __BITS(10,8) 344#define VMCB_CTRL_EVENTINJ_EV __BIT(11) 345#define VMCB_CTRL_EVENTINJ_V __BIT(31) 346#define VMCB_CTRL_EVENTINJ_ERRORCODE __BITS(63,32) 347 348 uint64_t n_cr3; 349 350 uint64_t enable2; 351#define VMCB_CTRL_ENABLE_LBR __BIT(0) 352#define VMCB_CTRL_ENABLE_VVMSAVE __BIT(1) 353 354 uint32_t vmcb_clean; 355#define VMCB_CTRL_VMCB_CLEAN_I __BIT(0) 356#define VMCB_CTRL_VMCB_CLEAN_IOPM __BIT(1) 357#define VMCB_CTRL_VMCB_CLEAN_ASID __BIT(2) 358#define VMCB_CTRL_VMCB_CLEAN_TPR __BIT(3) 359#define VMCB_CTRL_VMCB_CLEAN_NP __BIT(4) 360#define VMCB_CTRL_VMCB_CLEAN_CR __BIT(5) 361#define VMCB_CTRL_VMCB_CLEAN_DR __BIT(6) 362#define VMCB_CTRL_VMCB_CLEAN_DT __BIT(7) 363#define VMCB_CTRL_VMCB_CLEAN_SEG __BIT(8) 364#define VMCB_CTRL_VMCB_CLEAN_CR2 __BIT(9) 365#define VMCB_CTRL_VMCB_CLEAN_LBR __BIT(10) 366#define VMCB_CTRL_VMCB_CLEAN_AVIC __BIT(11) 367 368 uint32_t rsvd2; 369 uint64_t nrip; 370 uint8_t inst_len; 371 uint8_t inst_bytes[15]; 372 uint64_t avic_abpp; 373 uint64_t rsvd3; 374 uint64_t avic_ltp; 375 376 uint64_t avic_phys; 377#define VMCB_CTRL_AVIC_PHYS_TABLE_PTR __BITS(51,12) 378#define VMCB_CTRL_AVIC_PHYS_MAX_INDEX __BITS(7,0) 379 380 uint64_t rsvd4; 381 uint64_t vmcb_ptr; 382 383 uint8_t pad[752]; 384} __packed; 385 386CTASSERT(sizeof(struct vmcb_ctrl) == 1024); 387 388struct vmcb_segment { 389 uint16_t selector; 390 uint16_t attrib; /* hidden */ 391 uint32_t limit; /* hidden */ 392 uint64_t base; /* hidden */ 393} __packed; 394 395CTASSERT(sizeof(struct vmcb_segment) == 16); 396 397struct vmcb_state { 398 struct vmcb_segment es; 399 struct vmcb_segment cs; 400 struct vmcb_segment ss; 401 struct vmcb_segment ds; 402 struct vmcb_segment fs; 403 struct vmcb_segment gs; 404 struct vmcb_segment gdt; 405 struct vmcb_segment ldt; 406 struct vmcb_segment idt; 407 struct vmcb_segment tr; 408 uint8_t rsvd1[43]; 409 uint8_t cpl; 410 uint8_t rsvd2[4]; 411 uint64_t efer; 412 uint8_t rsvd3[112]; 413 uint64_t cr4; 414 uint64_t cr3; 415 uint64_t cr0; 416 uint64_t dr7; 417 uint64_t dr6; 418 uint64_t rflags; 419 uint64_t rip; 420 uint8_t rsvd4[88]; 421 uint64_t rsp; 422 uint8_t rsvd5[24]; 423 uint64_t rax; 424 uint64_t star; 425 uint64_t lstar; 426 uint64_t cstar; 427 uint64_t sfmask; 428 uint64_t kernelgsbase; 429 uint64_t sysenter_cs; 430 uint64_t sysenter_esp; 431 uint64_t sysenter_eip; 432 uint64_t cr2; 433 uint8_t rsvd6[32]; 434 uint64_t g_pat; 435 uint64_t dbgctl; 436 uint64_t br_from; 437 uint64_t br_to; 438 uint64_t int_from; 439 uint64_t int_to; 440 uint8_t pad[2408]; 441} __packed; 442 443CTASSERT(sizeof(struct vmcb_state) == 0xC00); 444 445struct vmcb { 446 struct vmcb_ctrl ctrl; 447 struct vmcb_state state; 448} __packed; 449 450CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); 451CTASSERT(offsetof(struct vmcb, state) == 0x400); 452 453/* -------------------------------------------------------------------------- */ 454 455static void svm_vcpu_state_provide(struct nvmm_cpu *, uint64_t); 456static void svm_vcpu_state_commit(struct nvmm_cpu *); 457 458struct svm_hsave { 459 paddr_t pa; 460}; 461 462static struct svm_hsave hsave[MAXCPUS]; 463 464static uint8_t *svm_asidmap __read_mostly; 465static uint32_t svm_maxasid __read_mostly; 466static kmutex_t svm_asidlock __cacheline_aligned; 467 468static bool svm_decode_assist __read_mostly; 469static uint32_t svm_ctrl_tlb_flush __read_mostly; 470 471#define SVM_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) 472static uint64_t svm_xcr0_mask __read_mostly; 473 474#define SVM_NCPUIDS 32 475 476#define VMCB_NPAGES 1 477 478#define MSRBM_NPAGES 2 479#define MSRBM_SIZE (MSRBM_NPAGES * PAGE_SIZE) 480 481#define IOBM_NPAGES 3 482#define IOBM_SIZE (IOBM_NPAGES * PAGE_SIZE) 483 484/* Does not include EFER_LMSLE. */ 485#define EFER_VALID \ 486 (EFER_SCE|EFER_LME|EFER_LMA|EFER_NXE|EFER_SVME|EFER_FFXSR|EFER_TCE) 487 488#define EFER_TLB_FLUSH \ 489 (EFER_NXE|EFER_LMA|EFER_LME) 490#define CR0_TLB_FLUSH \ 491 (CR0_PG|CR0_WP|CR0_CD|CR0_NW) 492#define CR4_TLB_FLUSH \ 493 (CR4_PGE|CR4_PAE|CR4_PSE) 494 495/* -------------------------------------------------------------------------- */ 496 497struct svm_machdata { 498 bool cpuidpresent[SVM_NCPUIDS]; 499 struct nvmm_x86_conf_cpuid cpuid[SVM_NCPUIDS]; 500 volatile uint64_t mach_htlb_gen; 501}; 502 503static const size_t svm_conf_sizes[NVMM_X86_NCONF] = { 504 [NVMM_X86_CONF_CPUID] = sizeof(struct nvmm_x86_conf_cpuid) 505}; 506 507struct svm_cpudata { 508 /* General */ 509 bool shared_asid; 510 bool gtlb_want_flush; 511 bool gtsc_want_update; 512 uint64_t vcpu_htlb_gen; 513 514 /* VMCB */ 515 struct vmcb *vmcb; 516 paddr_t vmcb_pa; 517 518 /* I/O bitmap */ 519 uint8_t *iobm; 520 paddr_t iobm_pa; 521 522 /* MSR bitmap */ 523 uint8_t *msrbm; 524 paddr_t msrbm_pa; 525 526 /* Host state */ 527 uint64_t hxcr0; 528 uint64_t star; 529 uint64_t lstar; 530 uint64_t cstar; 531 uint64_t sfmask; 532 uint64_t fsbase; 533 uint64_t kernelgsbase; 534 bool ts_set; 535 struct xsave_header hfpu __aligned(64); 536 537 /* Intr state */ 538 bool int_window_exit; 539 bool nmi_window_exit; 540 bool evt_pending; 541 542 /* Guest state */ 543 uint64_t gxcr0; 544 uint64_t gprs[NVMM_X64_NGPR]; 545 uint64_t drs[NVMM_X64_NDR]; 546 uint64_t gtsc; 547 struct xsave_header gfpu __aligned(64); 548}; 549 550static void 551svm_vmcb_cache_default(struct vmcb *vmcb) 552{ 553 vmcb->ctrl.vmcb_clean = 554 VMCB_CTRL_VMCB_CLEAN_I | 555 VMCB_CTRL_VMCB_CLEAN_IOPM | 556 VMCB_CTRL_VMCB_CLEAN_ASID | 557 VMCB_CTRL_VMCB_CLEAN_TPR | 558 VMCB_CTRL_VMCB_CLEAN_NP | 559 VMCB_CTRL_VMCB_CLEAN_CR | 560 VMCB_CTRL_VMCB_CLEAN_DR | 561 VMCB_CTRL_VMCB_CLEAN_DT | 562 VMCB_CTRL_VMCB_CLEAN_SEG | 563 VMCB_CTRL_VMCB_CLEAN_CR2 | 564 VMCB_CTRL_VMCB_CLEAN_LBR | 565 VMCB_CTRL_VMCB_CLEAN_AVIC; 566} 567 568static void 569svm_vmcb_cache_update(struct vmcb *vmcb, uint64_t flags) 570{ 571 if (flags & NVMM_X64_STATE_SEGS) { 572 vmcb->ctrl.vmcb_clean &= 573 ~(VMCB_CTRL_VMCB_CLEAN_SEG | VMCB_CTRL_VMCB_CLEAN_DT); 574 } 575 if (flags & NVMM_X64_STATE_CRS) { 576 vmcb->ctrl.vmcb_clean &= 577 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_CR2 | 578 VMCB_CTRL_VMCB_CLEAN_TPR); 579 } 580 if (flags & NVMM_X64_STATE_DRS) { 581 vmcb->ctrl.vmcb_clean &= ~VMCB_CTRL_VMCB_CLEAN_DR; 582 } 583 if (flags & NVMM_X64_STATE_MSRS) { 584 /* CR for EFER, NP for PAT. */ 585 vmcb->ctrl.vmcb_clean &= 586 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_NP); 587 } 588} 589 590static inline void 591svm_vmcb_cache_flush(struct vmcb *vmcb, uint64_t flags) 592{ 593 vmcb->ctrl.vmcb_clean &= ~flags; 594} 595 596static inline void 597svm_vmcb_cache_flush_all(struct vmcb *vmcb) 598{ 599 vmcb->ctrl.vmcb_clean = 0; 600} 601 602#define SVM_EVENT_TYPE_HW_INT 0 603#define SVM_EVENT_TYPE_NMI 2 604#define SVM_EVENT_TYPE_EXC 3 605#define SVM_EVENT_TYPE_SW_INT 4 606 607static void 608svm_event_waitexit_enable(struct nvmm_cpu *vcpu, bool nmi) 609{ 610 struct svm_cpudata *cpudata = vcpu->cpudata; 611 struct vmcb *vmcb = cpudata->vmcb; 612 613 if (nmi) { 614 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_IRET; 615 cpudata->nmi_window_exit = true; 616 } else { 617 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_VINTR; 618 vmcb->ctrl.v |= (VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 619 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 620 cpudata->int_window_exit = true; 621 } 622 623 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 624} 625 626static void 627svm_event_waitexit_disable(struct nvmm_cpu *vcpu, bool nmi) 628{ 629 struct svm_cpudata *cpudata = vcpu->cpudata; 630 struct vmcb *vmcb = cpudata->vmcb; 631 632 if (nmi) { 633 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_IRET; 634 cpudata->nmi_window_exit = false; 635 } else { 636 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_VINTR; 637 vmcb->ctrl.v &= ~(VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 638 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 639 cpudata->int_window_exit = false; 640 } 641 642 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 643} 644 645static inline int 646svm_event_has_error(uint64_t vector) 647{ 648 switch (vector) { 649 case 8: /* #DF */ 650 case 10: /* #TS */ 651 case 11: /* #NP */ 652 case 12: /* #SS */ 653 case 13: /* #GP */ 654 case 14: /* #PF */ 655 case 17: /* #AC */ 656 case 30: /* #SX */ 657 return 1; 658 default: 659 return 0; 660 } 661} 662 663static int 664svm_vcpu_inject(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 665 struct nvmm_event *event) 666{ 667 struct svm_cpudata *cpudata = vcpu->cpudata; 668 struct vmcb *vmcb = cpudata->vmcb; 669 int type = 0, err = 0; 670 671 if (event->vector >= 256) { 672 return EINVAL; 673 } 674 675 switch (event->type) { 676 case NVMM_EVENT_INTERRUPT_HW: 677 type = SVM_EVENT_TYPE_HW_INT; 678 if (event->vector == 2) { 679 type = SVM_EVENT_TYPE_NMI; 680 svm_event_waitexit_enable(vcpu, true); 681 } 682 err = 0; 683 break; 684 case NVMM_EVENT_INTERRUPT_SW: 685 return EINVAL; 686 case NVMM_EVENT_EXCEPTION: 687 type = SVM_EVENT_TYPE_EXC; 688 if (event->vector == 2 || event->vector >= 32) 689 return EINVAL; 690 if (event->vector == 3 || event->vector == 0) 691 return EINVAL; 692 err = svm_event_has_error(event->vector); 693 break; 694 default: 695 return EINVAL; 696 } 697 698 vmcb->ctrl.eventinj = 699 __SHIFTIN(event->vector, VMCB_CTRL_EVENTINJ_VECTOR) | 700 __SHIFTIN(type, VMCB_CTRL_EVENTINJ_TYPE) | 701 __SHIFTIN(err, VMCB_CTRL_EVENTINJ_EV) | 702 __SHIFTIN(1, VMCB_CTRL_EVENTINJ_V) | 703 __SHIFTIN(event->u.error, VMCB_CTRL_EVENTINJ_ERRORCODE); 704 705 cpudata->evt_pending = true; 706 707 return 0; 708} 709 710static void 711svm_inject_ud(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 712{ 713 struct nvmm_event event; 714 int ret __diagused; 715 716 event.type = NVMM_EVENT_EXCEPTION; 717 event.vector = 6; 718 event.u.error = 0; 719 720 ret = svm_vcpu_inject(mach, vcpu, &event); 721 KASSERT(ret == 0); 722} 723 724static void 725svm_inject_gp(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 726{ 727 struct nvmm_event event; 728 int ret __diagused; 729 730 event.type = NVMM_EVENT_EXCEPTION; 731 event.vector = 13; 732 event.u.error = 0; 733 734 ret = svm_vcpu_inject(mach, vcpu, &event); 735 KASSERT(ret == 0); 736} 737 738static inline void 739svm_inkernel_advance(struct vmcb *vmcb) 740{ 741 /* 742 * Maybe we should also apply single-stepping and debug exceptions. 743 * Matters for guest-ring3, because it can execute 'cpuid' under a 744 * debugger. 745 */ 746 vmcb->state.rip = vmcb->ctrl.nrip; 747 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 748} 749 750static void 751svm_inkernel_handle_cpuid(struct nvmm_cpu *vcpu, uint64_t eax, uint64_t ecx) 752{ 753 struct svm_cpudata *cpudata = vcpu->cpudata; 754 uint64_t cr4; 755 756 switch (eax) { 757 case 0x00000001: 758 cpudata->vmcb->state.rax &= nvmm_cpuid_00000001.eax; 759 760 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~CPUID_LOCAL_APIC_ID; 761 cpudata->gprs[NVMM_X64_GPR_RBX] |= __SHIFTIN(vcpu->cpuid, 762 CPUID_LOCAL_APIC_ID); 763 764 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000001.ecx; 765 cpudata->gprs[NVMM_X64_GPR_RCX] |= CPUID2_RAZ; 766 767 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000001.edx; 768 769 /* CPUID2_OSXSAVE depends on CR4. */ 770 cr4 = cpudata->vmcb->state.cr4; 771 if (!(cr4 & CR4_OSXSAVE)) { 772 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~CPUID2_OSXSAVE; 773 } 774 break; 775 case 0x00000005: 776 case 0x00000006: 777 cpudata->vmcb->state.rax = 0; 778 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 779 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 780 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 781 break; 782 case 0x00000007: 783 cpudata->vmcb->state.rax &= nvmm_cpuid_00000007.eax; 784 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_00000007.ebx; 785 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000007.ecx; 786 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000007.edx; 787 break; 788 case 0x0000000D: 789 if (svm_xcr0_mask == 0) { 790 break; 791 } 792 switch (ecx) { 793 case 0: 794 cpudata->vmcb->state.rax = svm_xcr0_mask & 0xFFFFFFFF; 795 if (cpudata->gxcr0 & XCR0_SSE) { 796 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct fxsave); 797 } else { 798 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct save87); 799 } 800 cpudata->gprs[NVMM_X64_GPR_RBX] += 64; /* XSAVE header */ 801 cpudata->gprs[NVMM_X64_GPR_RCX] = sizeof(struct fxsave) + 64; 802 cpudata->gprs[NVMM_X64_GPR_RDX] = svm_xcr0_mask >> 32; 803 break; 804 case 1: 805 cpudata->vmcb->state.rax &= ~CPUID_PES1_XSAVES; 806 break; 807 } 808 break; 809 case 0x40000000: 810 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 811 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 812 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 813 memcpy(&cpudata->gprs[NVMM_X64_GPR_RBX], "___ ", 4); 814 memcpy(&cpudata->gprs[NVMM_X64_GPR_RCX], "NVMM", 4); 815 memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); 816 break; 817 case 0x80000001: 818 cpudata->vmcb->state.rax &= nvmm_cpuid_80000001.eax; 819 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000001.ebx; 820 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000001.ecx; 821 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000001.edx; 822 break; 823 default: 824 break; 825 } 826} 827 828static void 829svm_exit_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 830 struct nvmm_exit *exit) 831{ 832 struct svm_machdata *machdata = mach->machdata; 833 struct svm_cpudata *cpudata = vcpu->cpudata; 834 struct nvmm_x86_conf_cpuid *cpuid; 835 uint64_t eax, ecx; 836 u_int descs[4]; 837 size_t i; 838 839 eax = cpudata->vmcb->state.rax; 840 ecx = cpudata->gprs[NVMM_X64_GPR_RCX]; 841 x86_cpuid2(eax, ecx, descs); 842 843 cpudata->vmcb->state.rax = descs[0]; 844 cpudata->gprs[NVMM_X64_GPR_RBX] = descs[1]; 845 cpudata->gprs[NVMM_X64_GPR_RCX] = descs[2]; 846 cpudata->gprs[NVMM_X64_GPR_RDX] = descs[3]; 847 848 svm_inkernel_handle_cpuid(vcpu, eax, ecx); 849 850 for (i = 0; i < SVM_NCPUIDS; i++) { 851 cpuid = &machdata->cpuid[i]; 852 if (!machdata->cpuidpresent[i]) { 853 continue; 854 } 855 if (cpuid->leaf != eax) { 856 continue; 857 } 858 859 /* del */ 860 cpudata->vmcb->state.rax &= ~cpuid->del.eax; 861 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~cpuid->del.ebx; 862 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~cpuid->del.ecx; 863 cpudata->gprs[NVMM_X64_GPR_RDX] &= ~cpuid->del.edx; 864 865 /* set */ 866 cpudata->vmcb->state.rax |= cpuid->set.eax; 867 cpudata->gprs[NVMM_X64_GPR_RBX] |= cpuid->set.ebx; 868 cpudata->gprs[NVMM_X64_GPR_RCX] |= cpuid->set.ecx; 869 cpudata->gprs[NVMM_X64_GPR_RDX] |= cpuid->set.edx; 870 871 break; 872 } 873 874 svm_inkernel_advance(cpudata->vmcb); 875 exit->reason = NVMM_EXIT_NONE; 876} 877 878static void 879svm_exit_hlt(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 880 struct nvmm_exit *exit) 881{ 882 struct svm_cpudata *cpudata = vcpu->cpudata; 883 struct vmcb *vmcb = cpudata->vmcb; 884 885 if (cpudata->int_window_exit && (vmcb->state.rflags & PSL_I)) { 886 svm_event_waitexit_disable(vcpu, false); 887 } 888 889 svm_inkernel_advance(cpudata->vmcb); 890 exit->reason = NVMM_EXIT_HALTED; 891} 892 893#define SVM_EXIT_IO_PORT __BITS(31,16) 894#define SVM_EXIT_IO_SEG __BITS(12,10) 895#define SVM_EXIT_IO_A64 __BIT(9) 896#define SVM_EXIT_IO_A32 __BIT(8) 897#define SVM_EXIT_IO_A16 __BIT(7) 898#define SVM_EXIT_IO_SZ32 __BIT(6) 899#define SVM_EXIT_IO_SZ16 __BIT(5) 900#define SVM_EXIT_IO_SZ8 __BIT(4) 901#define SVM_EXIT_IO_REP __BIT(3) 902#define SVM_EXIT_IO_STR __BIT(2) 903#define SVM_EXIT_IO_IN __BIT(0) 904 905static void 906svm_exit_io(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 907 struct nvmm_exit *exit) 908{ 909 struct svm_cpudata *cpudata = vcpu->cpudata; 910 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 911 uint64_t nextpc = cpudata->vmcb->ctrl.exitinfo2; 912 913 exit->reason = NVMM_EXIT_IO; 914 915 if (info & SVM_EXIT_IO_IN) { 916 exit->u.io.type = NVMM_EXIT_IO_IN; 917 } else { 918 exit->u.io.type = NVMM_EXIT_IO_OUT; 919 } 920 921 exit->u.io.port = __SHIFTOUT(info, SVM_EXIT_IO_PORT); 922 923 if (svm_decode_assist) { 924 KASSERT(__SHIFTOUT(info, SVM_EXIT_IO_SEG) < 6); 925 exit->u.io.seg = __SHIFTOUT(info, SVM_EXIT_IO_SEG); 926 } else { 927 exit->u.io.seg = -1; 928 } 929 930 if (info & SVM_EXIT_IO_A64) { 931 exit->u.io.address_size = 8; 932 } else if (info & SVM_EXIT_IO_A32) { 933 exit->u.io.address_size = 4; 934 } else if (info & SVM_EXIT_IO_A16) { 935 exit->u.io.address_size = 2; 936 } 937 938 if (info & SVM_EXIT_IO_SZ32) { 939 exit->u.io.operand_size = 4; 940 } else if (info & SVM_EXIT_IO_SZ16) { 941 exit->u.io.operand_size = 2; 942 } else if (info & SVM_EXIT_IO_SZ8) { 943 exit->u.io.operand_size = 1; 944 } 945 946 exit->u.io.rep = (info & SVM_EXIT_IO_REP) != 0; 947 exit->u.io.str = (info & SVM_EXIT_IO_STR) != 0; 948 exit->u.io.npc = nextpc; 949 950 svm_vcpu_state_provide(vcpu, 951 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 952 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 953} 954 955static const uint64_t msr_ignore_list[] = { 956 0xc0010055, /* MSR_CMPHALT */ 957 MSR_DE_CFG, 958 MSR_IC_CFG, 959 MSR_UCODE_AMD_PATCHLEVEL 960}; 961 962static bool 963svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 964 struct nvmm_exit *exit) 965{ 966 struct svm_cpudata *cpudata = vcpu->cpudata; 967 struct vmcb *vmcb = cpudata->vmcb; 968 uint64_t val; 969 size_t i; 970 971 switch (exit->u.msr.type) { 972 case NVMM_EXIT_MSR_RDMSR: 973 if (exit->u.msr.msr == MSR_NB_CFG) { 974 val = NB_CFG_INITAPICCPUIDLO; 975 vmcb->state.rax = (val & 0xFFFFFFFF); 976 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 977 goto handled; 978 } 979 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 980 if (msr_ignore_list[i] != exit->u.msr.msr) 981 continue; 982 val = 0; 983 vmcb->state.rax = (val & 0xFFFFFFFF); 984 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 985 goto handled; 986 } 987 break; 988 case NVMM_EXIT_MSR_WRMSR: 989 if (exit->u.msr.msr == MSR_EFER) { 990 if (__predict_false(exit->u.msr.val & ~EFER_VALID)) { 991 goto error; 992 } 993 if ((vmcb->state.efer ^ exit->u.msr.val) & 994 EFER_TLB_FLUSH) { 995 cpudata->gtlb_want_flush = true; 996 } 997 vmcb->state.efer = exit->u.msr.val | EFER_SVME; 998 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_CR); 999 goto handled; 1000 } 1001 if (exit->u.msr.msr == MSR_TSC) { 1002 cpudata->gtsc = exit->u.msr.val; 1003 cpudata->gtsc_want_update = true; 1004 goto handled; 1005 } 1006 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1007 if (msr_ignore_list[i] != exit->u.msr.msr) 1008 continue; 1009 goto handled; 1010 } 1011 break; 1012 } 1013 1014 return false; 1015 1016handled: 1017 svm_inkernel_advance(cpudata->vmcb); 1018 return true; 1019 1020error: 1021 svm_inject_gp(mach, vcpu); 1022 return true; 1023} 1024 1025static void 1026svm_exit_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1027 struct nvmm_exit *exit) 1028{ 1029 struct svm_cpudata *cpudata = vcpu->cpudata; 1030 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1031 1032 if (info == 0) { 1033 exit->u.msr.type = NVMM_EXIT_MSR_RDMSR; 1034 } else { 1035 exit->u.msr.type = NVMM_EXIT_MSR_WRMSR; 1036 } 1037 1038 exit->u.msr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1039 1040 if (info == 1) { 1041 uint64_t rdx, rax; 1042 rdx = cpudata->gprs[NVMM_X64_GPR_RDX]; 1043 rax = cpudata->vmcb->state.rax; 1044 exit->u.msr.val = (rdx << 32) | (rax & 0xFFFFFFFF); 1045 } else { 1046 exit->u.msr.val = 0; 1047 } 1048 1049 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1050 exit->reason = NVMM_EXIT_NONE; 1051 return; 1052 } 1053 1054 exit->reason = NVMM_EXIT_MSR; 1055 exit->u.msr.npc = cpudata->vmcb->ctrl.nrip; 1056 1057 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1058} 1059 1060static void 1061svm_exit_npf(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1062 struct nvmm_exit *exit) 1063{ 1064 struct svm_cpudata *cpudata = vcpu->cpudata; 1065 gpaddr_t gpa = cpudata->vmcb->ctrl.exitinfo2; 1066 1067 exit->reason = NVMM_EXIT_MEMORY; 1068 if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_W) 1069 exit->u.mem.prot = PROT_WRITE; 1070 else if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_X) 1071 exit->u.mem.prot = PROT_EXEC; 1072 else 1073 exit->u.mem.prot = PROT_READ; 1074 exit->u.mem.gpa = gpa; 1075 exit->u.mem.inst_len = cpudata->vmcb->ctrl.inst_len; 1076 memcpy(exit->u.mem.inst_bytes, cpudata->vmcb->ctrl.inst_bytes, 1077 sizeof(exit->u.mem.inst_bytes)); 1078 1079 svm_vcpu_state_provide(vcpu, 1080 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1081 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1082} 1083 1084static void 1085svm_exit_insn(struct vmcb *vmcb, struct nvmm_exit *exit, uint64_t reason) 1086{ 1087 exit->u.insn.npc = vmcb->ctrl.nrip; 1088 exit->reason = reason; 1089} 1090 1091static void 1092svm_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1093 struct nvmm_exit *exit) 1094{ 1095 struct svm_cpudata *cpudata = vcpu->cpudata; 1096 struct vmcb *vmcb = cpudata->vmcb; 1097 uint64_t val; 1098 1099 exit->reason = NVMM_EXIT_NONE; 1100 1101 val = (cpudata->gprs[NVMM_X64_GPR_RDX] << 32) | 1102 (vmcb->state.rax & 0xFFFFFFFF); 1103 1104 if (__predict_false(cpudata->gprs[NVMM_X64_GPR_RCX] != 0)) { 1105 goto error; 1106 } else if (__predict_false(vmcb->state.cpl != 0)) { 1107 goto error; 1108 } else if (__predict_false((val & ~svm_xcr0_mask) != 0)) { 1109 goto error; 1110 } else if (__predict_false((val & XCR0_X87) == 0)) { 1111 goto error; 1112 } 1113 1114 cpudata->gxcr0 = val; 1115 1116 svm_inkernel_advance(cpudata->vmcb); 1117 return; 1118 1119error: 1120 svm_inject_gp(mach, vcpu); 1121} 1122 1123static void 1124svm_exit_invalid(struct nvmm_exit *exit, uint64_t code) 1125{ 1126 exit->u.inv.hwcode = code; 1127 exit->reason = NVMM_EXIT_INVALID; 1128} 1129 1130/* -------------------------------------------------------------------------- */ 1131 1132static void 1133svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) 1134{ 1135 struct svm_cpudata *cpudata = vcpu->cpudata; 1136 1137 cpudata->ts_set = (rcr0() & CR0_TS) != 0; 1138 1139 fpu_area_save(&cpudata->hfpu, svm_xcr0_mask); 1140 fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask); 1141 1142 if (svm_xcr0_mask != 0) { 1143 cpudata->hxcr0 = rdxcr(0); 1144 wrxcr(0, cpudata->gxcr0); 1145 } 1146} 1147 1148static void 1149svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) 1150{ 1151 struct svm_cpudata *cpudata = vcpu->cpudata; 1152 1153 if (svm_xcr0_mask != 0) { 1154 cpudata->gxcr0 = rdxcr(0); 1155 wrxcr(0, cpudata->hxcr0); 1156 } 1157 1158 fpu_area_save(&cpudata->gfpu, svm_xcr0_mask); 1159 fpu_area_restore(&cpudata->hfpu, svm_xcr0_mask); 1160 1161 if (cpudata->ts_set) { 1162 stts(); 1163 } 1164} 1165 1166static void 1167svm_vcpu_guest_dbregs_enter(struct nvmm_cpu *vcpu) 1168{ 1169 struct svm_cpudata *cpudata = vcpu->cpudata; 1170 1171 x86_dbregs_save(curlwp); 1172 1173 ldr7(0); 1174 1175 ldr0(cpudata->drs[NVMM_X64_DR_DR0]); 1176 ldr1(cpudata->drs[NVMM_X64_DR_DR1]); 1177 ldr2(cpudata->drs[NVMM_X64_DR_DR2]); 1178 ldr3(cpudata->drs[NVMM_X64_DR_DR3]); 1179} 1180 1181static void 1182svm_vcpu_guest_dbregs_leave(struct nvmm_cpu *vcpu) 1183{ 1184 struct svm_cpudata *cpudata = vcpu->cpudata; 1185 1186 cpudata->drs[NVMM_X64_DR_DR0] = rdr0(); 1187 cpudata->drs[NVMM_X64_DR_DR1] = rdr1(); 1188 cpudata->drs[NVMM_X64_DR_DR2] = rdr2(); 1189 cpudata->drs[NVMM_X64_DR_DR3] = rdr3(); 1190 1191 x86_dbregs_restore(curlwp); 1192} 1193 1194static void 1195svm_vcpu_guest_misc_enter(struct nvmm_cpu *vcpu) 1196{ 1197 struct svm_cpudata *cpudata = vcpu->cpudata; 1198 1199 cpudata->fsbase = rdmsr(MSR_FSBASE); 1200 cpudata->kernelgsbase = rdmsr(MSR_KERNELGSBASE); 1201} 1202 1203static void 1204svm_vcpu_guest_misc_leave(struct nvmm_cpu *vcpu) 1205{ 1206 struct svm_cpudata *cpudata = vcpu->cpudata; 1207 1208 wrmsr(MSR_STAR, cpudata->star); 1209 wrmsr(MSR_LSTAR, cpudata->lstar); 1210 wrmsr(MSR_CSTAR, cpudata->cstar); 1211 wrmsr(MSR_SFMASK, cpudata->sfmask); 1212 wrmsr(MSR_FSBASE, cpudata->fsbase); 1213 wrmsr(MSR_KERNELGSBASE, cpudata->kernelgsbase); 1214} 1215 1216/* -------------------------------------------------------------------------- */ 1217 1218static inline void 1219svm_gtlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1220{ 1221 struct svm_cpudata *cpudata = vcpu->cpudata; 1222 1223 if (vcpu->hcpu_last != hcpu || cpudata->shared_asid) { 1224 cpudata->gtlb_want_flush = true; 1225 } 1226} 1227 1228static inline void 1229svm_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1230{ 1231 /* 1232 * Nothing to do. If an hTLB flush was needed, either the VCPU was 1233 * executing on this hCPU and the hTLB already got flushed, or it 1234 * was executing on another hCPU in which case the catchup is done 1235 * in svm_gtlb_catchup(). 1236 */ 1237} 1238 1239static inline uint64_t 1240svm_htlb_flush(struct svm_machdata *machdata, struct svm_cpudata *cpudata) 1241{ 1242 struct vmcb *vmcb = cpudata->vmcb; 1243 uint64_t machgen; 1244 1245 machgen = machdata->mach_htlb_gen; 1246 if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) { 1247 return machgen; 1248 } 1249 1250 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1251 return machgen; 1252} 1253 1254static inline void 1255svm_htlb_flush_ack(struct svm_cpudata *cpudata, uint64_t machgen) 1256{ 1257 struct vmcb *vmcb = cpudata->vmcb; 1258 1259 if (__predict_true(vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID)) { 1260 cpudata->vcpu_htlb_gen = machgen; 1261 } 1262} 1263 1264static inline void 1265svm_exit_evt(struct svm_cpudata *cpudata, struct vmcb *vmcb) 1266{ 1267 cpudata->evt_pending = false; 1268 1269 if (__predict_false(vmcb->ctrl.exitintinfo & VMCB_CTRL_EXITINTINFO_V)) { 1270 vmcb->ctrl.eventinj = vmcb->ctrl.exitintinfo; 1271 cpudata->evt_pending = true; 1272 } 1273} 1274 1275static int 1276svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1277 struct nvmm_exit *exit) 1278{ 1279 struct nvmm_comm_page *comm = vcpu->comm; 1280 struct svm_machdata *machdata = mach->machdata; 1281 struct svm_cpudata *cpudata = vcpu->cpudata; 1282 struct vmcb *vmcb = cpudata->vmcb; 1283 uint64_t machgen; 1284 int hcpu, s; 1285 1286 svm_vcpu_state_commit(vcpu); 1287 comm->state_cached = 0; 1288 1289 kpreempt_disable(); 1290 hcpu = cpu_number(); 1291 1292 svm_gtlb_catchup(vcpu, hcpu); 1293 svm_htlb_catchup(vcpu, hcpu); 1294 1295 if (vcpu->hcpu_last != hcpu) { 1296 svm_vmcb_cache_flush_all(vmcb); 1297 cpudata->gtsc_want_update = true; 1298 } 1299 1300 svm_vcpu_guest_dbregs_enter(vcpu); 1301 svm_vcpu_guest_misc_enter(vcpu); 1302 1303 while (1) { 1304 if (cpudata->gtlb_want_flush) { 1305 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1306 } else { 1307 vmcb->ctrl.tlb_ctrl = 0; 1308 } 1309 1310 if (__predict_false(cpudata->gtsc_want_update)) { 1311 vmcb->ctrl.tsc_offset = cpudata->gtsc - rdtsc(); 1312 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 1313 } 1314 1315 s = splhigh(); 1316 machgen = svm_htlb_flush(machdata, cpudata); 1317 svm_vcpu_guest_fpu_enter(vcpu); 1318 svm_vmrun(cpudata->vmcb_pa, cpudata->gprs); 1319 svm_vcpu_guest_fpu_leave(vcpu); 1320 svm_htlb_flush_ack(cpudata, machgen); 1321 splx(s); 1322 1323 svm_vmcb_cache_default(vmcb); 1324 1325 if (vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID) { 1326 cpudata->gtlb_want_flush = false; 1327 cpudata->gtsc_want_update = false; 1328 vcpu->hcpu_last = hcpu; 1329 } 1330 svm_exit_evt(cpudata, vmcb); 1331 1332 switch (vmcb->ctrl.exitcode) { 1333 case VMCB_EXITCODE_INTR: 1334 case VMCB_EXITCODE_NMI: 1335 exit->reason = NVMM_EXIT_NONE; 1336 break; 1337 case VMCB_EXITCODE_VINTR: 1338 svm_event_waitexit_disable(vcpu, false); 1339 exit->reason = NVMM_EXIT_INT_READY; 1340 break; 1341 case VMCB_EXITCODE_IRET: 1342 svm_event_waitexit_disable(vcpu, true); 1343 exit->reason = NVMM_EXIT_NMI_READY; 1344 break; 1345 case VMCB_EXITCODE_CPUID: 1346 svm_exit_cpuid(mach, vcpu, exit); 1347 break; 1348 case VMCB_EXITCODE_HLT: 1349 svm_exit_hlt(mach, vcpu, exit); 1350 break; 1351 case VMCB_EXITCODE_IOIO: 1352 svm_exit_io(mach, vcpu, exit); 1353 break; 1354 case VMCB_EXITCODE_MSR: 1355 svm_exit_msr(mach, vcpu, exit); 1356 break; 1357 case VMCB_EXITCODE_SHUTDOWN: 1358 exit->reason = NVMM_EXIT_SHUTDOWN; 1359 break; 1360 case VMCB_EXITCODE_RDPMC: 1361 case VMCB_EXITCODE_RSM: 1362 case VMCB_EXITCODE_INVLPGA: 1363 case VMCB_EXITCODE_VMRUN: 1364 case VMCB_EXITCODE_VMMCALL: 1365 case VMCB_EXITCODE_VMLOAD: 1366 case VMCB_EXITCODE_VMSAVE: 1367 case VMCB_EXITCODE_STGI: 1368 case VMCB_EXITCODE_CLGI: 1369 case VMCB_EXITCODE_SKINIT: 1370 case VMCB_EXITCODE_RDTSCP: 1371 svm_inject_ud(mach, vcpu); 1372 exit->reason = NVMM_EXIT_NONE; 1373 break; 1374 case VMCB_EXITCODE_MONITOR: 1375 svm_exit_insn(vmcb, exit, NVMM_EXIT_MONITOR); 1376 break; 1377 case VMCB_EXITCODE_MWAIT: 1378 svm_exit_insn(vmcb, exit, NVMM_EXIT_MWAIT); 1379 break; 1380 case VMCB_EXITCODE_MWAIT_CONDITIONAL: 1381 svm_exit_insn(vmcb, exit, NVMM_EXIT_MWAIT_COND); 1382 break; 1383 case VMCB_EXITCODE_XSETBV: 1384 svm_exit_xsetbv(mach, vcpu, exit); 1385 break; 1386 case VMCB_EXITCODE_NPF: 1387 svm_exit_npf(mach, vcpu, exit); 1388 break; 1389 case VMCB_EXITCODE_FERR_FREEZE: /* ? */ 1390 default: 1391 svm_exit_invalid(exit, vmcb->ctrl.exitcode); 1392 break; 1393 } 1394 1395 /* If no reason to return to userland, keep rolling. */ 1396 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) { 1397 break; 1398 } 1399 if (curcpu()->ci_data.cpu_softints != 0) { 1400 break; 1401 } 1402 if (curlwp->l_flag & LW_USERRET) { 1403 break; 1404 } 1405 if (exit->reason != NVMM_EXIT_NONE) { 1406 break; 1407 } 1408 } 1409 1410 cpudata->gtsc = rdtsc() + vmcb->ctrl.tsc_offset; 1411 1412 svm_vcpu_guest_misc_leave(vcpu); 1413 svm_vcpu_guest_dbregs_leave(vcpu); 1414 1415 kpreempt_enable(); 1416 1417 exit->exitstate[NVMM_X64_EXITSTATE_CR8] = __SHIFTOUT(vmcb->ctrl.v, 1418 VMCB_CTRL_V_TPR); 1419 exit->exitstate[NVMM_X64_EXITSTATE_RFLAGS] = vmcb->state.rflags; 1420 1421 exit->exitstate[NVMM_X64_EXITSTATE_INT_SHADOW] = 1422 ((vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0); 1423 exit->exitstate[NVMM_X64_EXITSTATE_INT_WINDOW_EXIT] = 1424 cpudata->int_window_exit; 1425 exit->exitstate[NVMM_X64_EXITSTATE_NMI_WINDOW_EXIT] = 1426 cpudata->nmi_window_exit; 1427 exit->exitstate[NVMM_X64_EXITSTATE_EVT_PENDING] = 1428 cpudata->evt_pending; 1429 1430 return 0; 1431} 1432 1433/* -------------------------------------------------------------------------- */ 1434 1435static int 1436svm_memalloc(paddr_t *pa, vaddr_t *va, size_t npages) 1437{ 1438 struct pglist pglist; 1439 paddr_t _pa; 1440 vaddr_t _va; 1441 size_t i; 1442 int ret; 1443 1444 ret = uvm_pglistalloc(npages * PAGE_SIZE, 0, ~0UL, PAGE_SIZE, 0, 1445 &pglist, 1, 0); 1446 if (ret != 0) 1447 return ENOMEM; 1448 _pa = TAILQ_FIRST(&pglist)->phys_addr; 1449 _va = uvm_km_alloc(kernel_map, npages * PAGE_SIZE, 0, 1450 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 1451 if (_va == 0) 1452 goto error; 1453 1454 for (i = 0; i < npages; i++) { 1455 pmap_kenter_pa(_va + i * PAGE_SIZE, _pa + i * PAGE_SIZE, 1456 VM_PROT_READ | VM_PROT_WRITE, PMAP_WRITE_BACK); 1457 } 1458 pmap_update(pmap_kernel()); 1459 1460 memset((void *)_va, 0, npages * PAGE_SIZE); 1461 1462 *pa = _pa; 1463 *va = _va; 1464 return 0; 1465 1466error: 1467 for (i = 0; i < npages; i++) { 1468 uvm_pagefree(PHYS_TO_VM_PAGE(_pa + i * PAGE_SIZE)); 1469 } 1470 return ENOMEM; 1471} 1472 1473static void 1474svm_memfree(paddr_t pa, vaddr_t va, size_t npages) 1475{ 1476 size_t i; 1477 1478 pmap_kremove(va, npages * PAGE_SIZE); 1479 pmap_update(pmap_kernel()); 1480 uvm_km_free(kernel_map, va, npages * PAGE_SIZE, UVM_KMF_VAONLY); 1481 for (i = 0; i < npages; i++) { 1482 uvm_pagefree(PHYS_TO_VM_PAGE(pa + i * PAGE_SIZE)); 1483 } 1484} 1485 1486/* -------------------------------------------------------------------------- */ 1487 1488#define SVM_MSRBM_READ __BIT(0) 1489#define SVM_MSRBM_WRITE __BIT(1) 1490 1491static void 1492svm_vcpu_msr_allow(uint8_t *bitmap, uint64_t msr, bool read, bool write) 1493{ 1494 uint64_t byte; 1495 uint8_t bitoff; 1496 1497 if (msr < 0x00002000) { 1498 /* Range 1 */ 1499 byte = ((msr - 0x00000000) >> 2UL) + 0x0000; 1500 } else if (msr >= 0xC0000000 && msr < 0xC0002000) { 1501 /* Range 2 */ 1502 byte = ((msr - 0xC0000000) >> 2UL) + 0x0800; 1503 } else if (msr >= 0xC0010000 && msr < 0xC0012000) { 1504 /* Range 3 */ 1505 byte = ((msr - 0xC0010000) >> 2UL) + 0x1000; 1506 } else { 1507 panic("%s: wrong range", __func__); 1508 } 1509 1510 bitoff = (msr & 0x3) << 1; 1511 1512 if (read) { 1513 bitmap[byte] &= ~(SVM_MSRBM_READ << bitoff); 1514 } 1515 if (write) { 1516 bitmap[byte] &= ~(SVM_MSRBM_WRITE << bitoff); 1517 } 1518} 1519 1520#define SVM_SEG_ATTRIB_TYPE __BITS(3,0) 1521#define SVM_SEG_ATTRIB_S __BIT(4) 1522#define SVM_SEG_ATTRIB_DPL __BITS(6,5) 1523#define SVM_SEG_ATTRIB_P __BIT(7) 1524#define SVM_SEG_ATTRIB_AVL __BIT(8) 1525#define SVM_SEG_ATTRIB_L __BIT(9) 1526#define SVM_SEG_ATTRIB_DEF __BIT(10) 1527#define SVM_SEG_ATTRIB_G __BIT(11) 1528 1529static void 1530svm_vcpu_setstate_seg(const struct nvmm_x64_state_seg *seg, 1531 struct vmcb_segment *vseg) 1532{ 1533 vseg->selector = seg->selector; 1534 vseg->attrib = 1535 __SHIFTIN(seg->attrib.type, SVM_SEG_ATTRIB_TYPE) | 1536 __SHIFTIN(seg->attrib.s, SVM_SEG_ATTRIB_S) | 1537 __SHIFTIN(seg->attrib.dpl, SVM_SEG_ATTRIB_DPL) | 1538 __SHIFTIN(seg->attrib.p, SVM_SEG_ATTRIB_P) | 1539 __SHIFTIN(seg->attrib.avl, SVM_SEG_ATTRIB_AVL) | 1540 __SHIFTIN(seg->attrib.l, SVM_SEG_ATTRIB_L) | 1541 __SHIFTIN(seg->attrib.def, SVM_SEG_ATTRIB_DEF) | 1542 __SHIFTIN(seg->attrib.g, SVM_SEG_ATTRIB_G); 1543 vseg->limit = seg->limit; 1544 vseg->base = seg->base; 1545} 1546 1547static void 1548svm_vcpu_getstate_seg(struct nvmm_x64_state_seg *seg, struct vmcb_segment *vseg) 1549{ 1550 seg->selector = vseg->selector; 1551 seg->attrib.type = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_TYPE); 1552 seg->attrib.s = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_S); 1553 seg->attrib.dpl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DPL); 1554 seg->attrib.p = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_P); 1555 seg->attrib.avl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_AVL); 1556 seg->attrib.l = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_L); 1557 seg->attrib.def = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DEF); 1558 seg->attrib.g = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_G); 1559 seg->limit = vseg->limit; 1560 seg->base = vseg->base; 1561} 1562 1563static inline bool 1564svm_state_tlb_flush(const struct vmcb *vmcb, const struct nvmm_x64_state *state, 1565 uint64_t flags) 1566{ 1567 if (flags & NVMM_X64_STATE_CRS) { 1568 if ((vmcb->state.cr0 ^ 1569 state->crs[NVMM_X64_CR_CR0]) & CR0_TLB_FLUSH) { 1570 return true; 1571 } 1572 if (vmcb->state.cr3 != state->crs[NVMM_X64_CR_CR3]) { 1573 return true; 1574 } 1575 if ((vmcb->state.cr4 ^ 1576 state->crs[NVMM_X64_CR_CR4]) & CR4_TLB_FLUSH) { 1577 return true; 1578 } 1579 } 1580 1581 if (flags & NVMM_X64_STATE_MSRS) { 1582 if ((vmcb->state.efer ^ 1583 state->msrs[NVMM_X64_MSR_EFER]) & EFER_TLB_FLUSH) { 1584 return true; 1585 } 1586 } 1587 1588 return false; 1589} 1590 1591static void 1592svm_vcpu_setstate(struct nvmm_cpu *vcpu) 1593{ 1594 struct nvmm_comm_page *comm = vcpu->comm; 1595 const struct nvmm_x64_state *state = &comm->state; 1596 struct svm_cpudata *cpudata = vcpu->cpudata; 1597 struct vmcb *vmcb = cpudata->vmcb; 1598 struct fxsave *fpustate; 1599 uint64_t flags; 1600 1601 flags = comm->state_wanted; 1602 1603 if (svm_state_tlb_flush(vmcb, state, flags)) { 1604 cpudata->gtlb_want_flush = true; 1605 } 1606 1607 if (flags & NVMM_X64_STATE_SEGS) { 1608 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_CS], 1609 &vmcb->state.cs); 1610 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_DS], 1611 &vmcb->state.ds); 1612 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_ES], 1613 &vmcb->state.es); 1614 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_FS], 1615 &vmcb->state.fs); 1616 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GS], 1617 &vmcb->state.gs); 1618 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_SS], 1619 &vmcb->state.ss); 1620 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1621 &vmcb->state.gdt); 1622 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1623 &vmcb->state.idt); 1624 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1625 &vmcb->state.ldt); 1626 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_TR], 1627 &vmcb->state.tr); 1628 1629 vmcb->state.cpl = state->segs[NVMM_X64_SEG_SS].attrib.dpl; 1630 } 1631 1632 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1633 if (flags & NVMM_X64_STATE_GPRS) { 1634 memcpy(cpudata->gprs, state->gprs, sizeof(state->gprs)); 1635 1636 vmcb->state.rip = state->gprs[NVMM_X64_GPR_RIP]; 1637 vmcb->state.rsp = state->gprs[NVMM_X64_GPR_RSP]; 1638 vmcb->state.rax = state->gprs[NVMM_X64_GPR_RAX]; 1639 vmcb->state.rflags = state->gprs[NVMM_X64_GPR_RFLAGS]; 1640 } 1641 1642 if (flags & NVMM_X64_STATE_CRS) { 1643 vmcb->state.cr0 = state->crs[NVMM_X64_CR_CR0]; 1644 vmcb->state.cr2 = state->crs[NVMM_X64_CR_CR2]; 1645 vmcb->state.cr3 = state->crs[NVMM_X64_CR_CR3]; 1646 vmcb->state.cr4 = state->crs[NVMM_X64_CR_CR4]; 1647 1648 vmcb->ctrl.v &= ~VMCB_CTRL_V_TPR; 1649 vmcb->ctrl.v |= __SHIFTIN(state->crs[NVMM_X64_CR_CR8], 1650 VMCB_CTRL_V_TPR); 1651 1652 if (svm_xcr0_mask != 0) { 1653 /* Clear illegal XCR0 bits, set mandatory X87 bit. */ 1654 cpudata->gxcr0 = state->crs[NVMM_X64_CR_XCR0]; 1655 cpudata->gxcr0 &= svm_xcr0_mask; 1656 cpudata->gxcr0 |= XCR0_X87; 1657 } 1658 } 1659 1660 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1661 if (flags & NVMM_X64_STATE_DRS) { 1662 memcpy(cpudata->drs, state->drs, sizeof(state->drs)); 1663 1664 vmcb->state.dr6 = state->drs[NVMM_X64_DR_DR6]; 1665 vmcb->state.dr7 = state->drs[NVMM_X64_DR_DR7]; 1666 } 1667 1668 if (flags & NVMM_X64_STATE_MSRS) { 1669 /* 1670 * EFER_SVME is mandatory. 1671 */ 1672 vmcb->state.efer = state->msrs[NVMM_X64_MSR_EFER] | EFER_SVME; 1673 vmcb->state.star = state->msrs[NVMM_X64_MSR_STAR]; 1674 vmcb->state.lstar = state->msrs[NVMM_X64_MSR_LSTAR]; 1675 vmcb->state.cstar = state->msrs[NVMM_X64_MSR_CSTAR]; 1676 vmcb->state.sfmask = state->msrs[NVMM_X64_MSR_SFMASK]; 1677 vmcb->state.kernelgsbase = 1678 state->msrs[NVMM_X64_MSR_KERNELGSBASE]; 1679 vmcb->state.sysenter_cs = 1680 state->msrs[NVMM_X64_MSR_SYSENTER_CS]; 1681 vmcb->state.sysenter_esp = 1682 state->msrs[NVMM_X64_MSR_SYSENTER_ESP]; 1683 vmcb->state.sysenter_eip = 1684 state->msrs[NVMM_X64_MSR_SYSENTER_EIP]; 1685 vmcb->state.g_pat = state->msrs[NVMM_X64_MSR_PAT]; 1686 1687 cpudata->gtsc = state->msrs[NVMM_X64_MSR_TSC]; 1688 cpudata->gtsc_want_update = true; 1689 } 1690 1691 if (flags & NVMM_X64_STATE_INTR) { 1692 if (state->intr.int_shadow) { 1693 vmcb->ctrl.intr |= VMCB_CTRL_INTR_SHADOW; 1694 } else { 1695 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 1696 } 1697 1698 if (state->intr.int_window_exiting) { 1699 svm_event_waitexit_enable(vcpu, false); 1700 } else { 1701 svm_event_waitexit_disable(vcpu, false); 1702 } 1703 1704 if (state->intr.nmi_window_exiting) { 1705 svm_event_waitexit_enable(vcpu, true); 1706 } else { 1707 svm_event_waitexit_disable(vcpu, true); 1708 } 1709 } 1710 1711 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 1712 if (flags & NVMM_X64_STATE_FPU) { 1713 memcpy(cpudata->gfpu.xsh_fxsave, &state->fpu, 1714 sizeof(state->fpu)); 1715 1716 fpustate = (struct fxsave *)cpudata->gfpu.xsh_fxsave; 1717 fpustate->fx_mxcsr_mask &= x86_fpu_mxcsr_mask; 1718 fpustate->fx_mxcsr &= fpustate->fx_mxcsr_mask; 1719 1720 if (svm_xcr0_mask != 0) { 1721 /* Reset XSTATE_BV, to force a reload. */ 1722 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 1723 } 1724 } 1725 1726 svm_vmcb_cache_update(vmcb, flags); 1727 1728 comm->state_wanted = 0; 1729 comm->state_cached |= flags; 1730} 1731 1732static void 1733svm_vcpu_getstate(struct nvmm_cpu *vcpu) 1734{ 1735 struct nvmm_comm_page *comm = vcpu->comm; 1736 struct nvmm_x64_state *state = &comm->state; 1737 struct svm_cpudata *cpudata = vcpu->cpudata; 1738 struct vmcb *vmcb = cpudata->vmcb; 1739 uint64_t flags; 1740 1741 flags = comm->state_wanted; 1742 1743 if (flags & NVMM_X64_STATE_SEGS) { 1744 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_CS], 1745 &vmcb->state.cs); 1746 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_DS], 1747 &vmcb->state.ds); 1748 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_ES], 1749 &vmcb->state.es); 1750 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_FS], 1751 &vmcb->state.fs); 1752 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GS], 1753 &vmcb->state.gs); 1754 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_SS], 1755 &vmcb->state.ss); 1756 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1757 &vmcb->state.gdt); 1758 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1759 &vmcb->state.idt); 1760 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1761 &vmcb->state.ldt); 1762 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_TR], 1763 &vmcb->state.tr); 1764 1765 state->segs[NVMM_X64_SEG_SS].attrib.dpl = vmcb->state.cpl; 1766 } 1767 1768 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1769 if (flags & NVMM_X64_STATE_GPRS) { 1770 memcpy(state->gprs, cpudata->gprs, sizeof(state->gprs)); 1771 1772 state->gprs[NVMM_X64_GPR_RIP] = vmcb->state.rip; 1773 state->gprs[NVMM_X64_GPR_RSP] = vmcb->state.rsp; 1774 state->gprs[NVMM_X64_GPR_RAX] = vmcb->state.rax; 1775 state->gprs[NVMM_X64_GPR_RFLAGS] = vmcb->state.rflags; 1776 } 1777 1778 if (flags & NVMM_X64_STATE_CRS) { 1779 state->crs[NVMM_X64_CR_CR0] = vmcb->state.cr0; 1780 state->crs[NVMM_X64_CR_CR2] = vmcb->state.cr2; 1781 state->crs[NVMM_X64_CR_CR3] = vmcb->state.cr3; 1782 state->crs[NVMM_X64_CR_CR4] = vmcb->state.cr4; 1783 state->crs[NVMM_X64_CR_CR8] = __SHIFTOUT(vmcb->ctrl.v, 1784 VMCB_CTRL_V_TPR); 1785 state->crs[NVMM_X64_CR_XCR0] = cpudata->gxcr0; 1786 } 1787 1788 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1789 if (flags & NVMM_X64_STATE_DRS) { 1790 memcpy(state->drs, cpudata->drs, sizeof(state->drs)); 1791 1792 state->drs[NVMM_X64_DR_DR6] = vmcb->state.dr6; 1793 state->drs[NVMM_X64_DR_DR7] = vmcb->state.dr7; 1794 } 1795 1796 if (flags & NVMM_X64_STATE_MSRS) { 1797 state->msrs[NVMM_X64_MSR_EFER] = vmcb->state.efer; 1798 state->msrs[NVMM_X64_MSR_STAR] = vmcb->state.star; 1799 state->msrs[NVMM_X64_MSR_LSTAR] = vmcb->state.lstar; 1800 state->msrs[NVMM_X64_MSR_CSTAR] = vmcb->state.cstar; 1801 state->msrs[NVMM_X64_MSR_SFMASK] = vmcb->state.sfmask; 1802 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = 1803 vmcb->state.kernelgsbase; 1804 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = 1805 vmcb->state.sysenter_cs; 1806 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = 1807 vmcb->state.sysenter_esp; 1808 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = 1809 vmcb->state.sysenter_eip; 1810 state->msrs[NVMM_X64_MSR_PAT] = vmcb->state.g_pat; 1811 state->msrs[NVMM_X64_MSR_TSC] = cpudata->gtsc; 1812 1813 /* Hide SVME. */ 1814 state->msrs[NVMM_X64_MSR_EFER] &= ~EFER_SVME; 1815 } 1816 1817 if (flags & NVMM_X64_STATE_INTR) { 1818 state->intr.int_shadow = 1819 (vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0; 1820 state->intr.int_window_exiting = cpudata->int_window_exit; 1821 state->intr.nmi_window_exiting = cpudata->nmi_window_exit; 1822 state->intr.evt_pending = cpudata->evt_pending; 1823 } 1824 1825 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 1826 if (flags & NVMM_X64_STATE_FPU) { 1827 memcpy(&state->fpu, cpudata->gfpu.xsh_fxsave, 1828 sizeof(state->fpu)); 1829 } 1830 1831 comm->state_wanted = 0; 1832 comm->state_cached |= flags; 1833} 1834 1835static void 1836svm_vcpu_state_provide(struct nvmm_cpu *vcpu, uint64_t flags) 1837{ 1838 vcpu->comm->state_wanted = flags; 1839 svm_vcpu_getstate(vcpu); 1840} 1841 1842static void 1843svm_vcpu_state_commit(struct nvmm_cpu *vcpu) 1844{ 1845 vcpu->comm->state_wanted = vcpu->comm->state_commit; 1846 vcpu->comm->state_commit = 0; 1847 svm_vcpu_setstate(vcpu); 1848} 1849 1850/* -------------------------------------------------------------------------- */ 1851 1852static void 1853svm_asid_alloc(struct nvmm_cpu *vcpu) 1854{ 1855 struct svm_cpudata *cpudata = vcpu->cpudata; 1856 struct vmcb *vmcb = cpudata->vmcb; 1857 size_t i, oct, bit; 1858 1859 mutex_enter(&svm_asidlock); 1860 1861 for (i = 0; i < svm_maxasid; i++) { 1862 oct = i / 8; 1863 bit = i % 8; 1864 1865 if (svm_asidmap[oct] & __BIT(bit)) { 1866 continue; 1867 } 1868 1869 svm_asidmap[oct] |= __BIT(bit); 1870 vmcb->ctrl.guest_asid = i; 1871 mutex_exit(&svm_asidlock); 1872 return; 1873 } 1874 1875 /* 1876 * No free ASID. Use the last one, which is shared and requires 1877 * special TLB handling. 1878 */ 1879 cpudata->shared_asid = true; 1880 vmcb->ctrl.guest_asid = svm_maxasid - 1; 1881 mutex_exit(&svm_asidlock); 1882} 1883 1884static void 1885svm_asid_free(struct nvmm_cpu *vcpu) 1886{ 1887 struct svm_cpudata *cpudata = vcpu->cpudata; 1888 struct vmcb *vmcb = cpudata->vmcb; 1889 size_t oct, bit; 1890 1891 if (cpudata->shared_asid) { 1892 return; 1893 } 1894 1895 oct = vmcb->ctrl.guest_asid / 8; 1896 bit = vmcb->ctrl.guest_asid % 8; 1897 1898 mutex_enter(&svm_asidlock); 1899 svm_asidmap[oct] &= ~__BIT(bit); 1900 mutex_exit(&svm_asidlock); 1901} 1902 1903static void 1904svm_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 1905{ 1906 struct svm_cpudata *cpudata = vcpu->cpudata; 1907 struct vmcb *vmcb = cpudata->vmcb; 1908 1909 /* Allow reads/writes of Control Registers. */ 1910 vmcb->ctrl.intercept_cr = 0; 1911 1912 /* Allow reads/writes of Debug Registers. */ 1913 vmcb->ctrl.intercept_dr = 0; 1914 1915 /* Allow exceptions 0 to 31. */ 1916 vmcb->ctrl.intercept_vec = 0; 1917 1918 /* 1919 * Allow: 1920 * - SMI [smm interrupts] 1921 * - VINTR [virtual interrupts] 1922 * - CR0_SPEC [CR0 writes changing other fields than CR0.TS or CR0.MP] 1923 * - RIDTR [reads of IDTR] 1924 * - RGDTR [reads of GDTR] 1925 * - RLDTR [reads of LDTR] 1926 * - RTR [reads of TR] 1927 * - WIDTR [writes of IDTR] 1928 * - WGDTR [writes of GDTR] 1929 * - WLDTR [writes of LDTR] 1930 * - WTR [writes of TR] 1931 * - RDTSC [rdtsc instruction] 1932 * - PUSHF [pushf instruction] 1933 * - POPF [popf instruction] 1934 * - IRET [iret instruction] 1935 * - INTN [int $n instructions] 1936 * - INVD [invd instruction] 1937 * - PAUSE [pause instruction] 1938 * - INVLPG [invplg instruction] 1939 * - TASKSW [task switches] 1940 * 1941 * Intercept the rest below. 1942 */ 1943 vmcb->ctrl.intercept_misc1 = 1944 VMCB_CTRL_INTERCEPT_INTR | 1945 VMCB_CTRL_INTERCEPT_NMI | 1946 VMCB_CTRL_INTERCEPT_INIT | 1947 VMCB_CTRL_INTERCEPT_RDPMC | 1948 VMCB_CTRL_INTERCEPT_CPUID | 1949 VMCB_CTRL_INTERCEPT_RSM | 1950 VMCB_CTRL_INTERCEPT_HLT | 1951 VMCB_CTRL_INTERCEPT_INVLPGA | 1952 VMCB_CTRL_INTERCEPT_IOIO_PROT | 1953 VMCB_CTRL_INTERCEPT_MSR_PROT | 1954 VMCB_CTRL_INTERCEPT_FERR_FREEZE | 1955 VMCB_CTRL_INTERCEPT_SHUTDOWN; 1956 1957 /* 1958 * Allow: 1959 * - ICEBP [icebp instruction] 1960 * - WBINVD [wbinvd instruction] 1961 * - WCR_SPEC(0..15) [writes of CR0-15, received after instruction] 1962 * 1963 * Intercept the rest below. 1964 */ 1965 vmcb->ctrl.intercept_misc2 = 1966 VMCB_CTRL_INTERCEPT_VMRUN | 1967 VMCB_CTRL_INTERCEPT_VMMCALL | 1968 VMCB_CTRL_INTERCEPT_VMLOAD | 1969 VMCB_CTRL_INTERCEPT_VMSAVE | 1970 VMCB_CTRL_INTERCEPT_STGI | 1971 VMCB_CTRL_INTERCEPT_CLGI | 1972 VMCB_CTRL_INTERCEPT_SKINIT | 1973 VMCB_CTRL_INTERCEPT_RDTSCP | 1974 VMCB_CTRL_INTERCEPT_MONITOR | 1975 VMCB_CTRL_INTERCEPT_MWAIT | 1976 VMCB_CTRL_INTERCEPT_XSETBV; 1977 1978 /* Intercept all I/O accesses. */ 1979 memset(cpudata->iobm, 0xFF, IOBM_SIZE); 1980 vmcb->ctrl.iopm_base_pa = cpudata->iobm_pa; 1981 1982 /* Allow direct access to certain MSRs. */ 1983 memset(cpudata->msrbm, 0xFF, MSRBM_SIZE); 1984 svm_vcpu_msr_allow(cpudata->msrbm, MSR_EFER, true, false); 1985 svm_vcpu_msr_allow(cpudata->msrbm, MSR_STAR, true, true); 1986 svm_vcpu_msr_allow(cpudata->msrbm, MSR_LSTAR, true, true); 1987 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CSTAR, true, true); 1988 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SFMASK, true, true); 1989 svm_vcpu_msr_allow(cpudata->msrbm, MSR_KERNELGSBASE, true, true); 1990 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_CS, true, true); 1991 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_ESP, true, true); 1992 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_EIP, true, true); 1993 svm_vcpu_msr_allow(cpudata->msrbm, MSR_FSBASE, true, true); 1994 svm_vcpu_msr_allow(cpudata->msrbm, MSR_GSBASE, true, true); 1995 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CR_PAT, true, true); 1996 svm_vcpu_msr_allow(cpudata->msrbm, MSR_TSC, true, false); 1997 vmcb->ctrl.msrpm_base_pa = cpudata->msrbm_pa; 1998 1999 /* Generate ASID. */ 2000 svm_asid_alloc(vcpu); 2001 2002 /* Virtual TPR. */ 2003 vmcb->ctrl.v = VMCB_CTRL_V_INTR_MASKING; 2004 2005 /* Enable Nested Paging. */ 2006 vmcb->ctrl.enable1 = VMCB_CTRL_ENABLE_NP; 2007 vmcb->ctrl.n_cr3 = mach->vm->vm_map.pmap->pm_pdirpa[0]; 2008 2009 /* Init XSAVE header. */ 2010 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 2011 cpudata->gfpu.xsh_xcomp_bv = 0; 2012 2013 /* These MSRs are static. */ 2014 cpudata->star = rdmsr(MSR_STAR); 2015 cpudata->lstar = rdmsr(MSR_LSTAR); 2016 cpudata->cstar = rdmsr(MSR_CSTAR); 2017 cpudata->sfmask = rdmsr(MSR_SFMASK); 2018 2019 /* Install the RESET state. */ 2020 memcpy(&vcpu->comm->state, &nvmm_x86_reset_state, 2021 sizeof(nvmm_x86_reset_state)); 2022 vcpu->comm->state_wanted = NVMM_X64_STATE_ALL; 2023 vcpu->comm->state_cached = 0; 2024 svm_vcpu_setstate(vcpu); 2025} 2026 2027static int 2028svm_vcpu_create(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2029{ 2030 struct svm_cpudata *cpudata; 2031 int error; 2032 2033 /* Allocate the SVM cpudata. */ 2034 cpudata = (struct svm_cpudata *)uvm_km_alloc(kernel_map, 2035 roundup(sizeof(*cpudata), PAGE_SIZE), 0, 2036 UVM_KMF_WIRED|UVM_KMF_ZERO); 2037 vcpu->cpudata = cpudata; 2038 2039 /* VMCB */ 2040 error = svm_memalloc(&cpudata->vmcb_pa, (vaddr_t *)&cpudata->vmcb, 2041 VMCB_NPAGES); 2042 if (error) 2043 goto error; 2044 2045 /* I/O Bitmap */ 2046 error = svm_memalloc(&cpudata->iobm_pa, (vaddr_t *)&cpudata->iobm, 2047 IOBM_NPAGES); 2048 if (error) 2049 goto error; 2050 2051 /* MSR Bitmap */ 2052 error = svm_memalloc(&cpudata->msrbm_pa, (vaddr_t *)&cpudata->msrbm, 2053 MSRBM_NPAGES); 2054 if (error) 2055 goto error; 2056 2057 /* Init the VCPU info. */ 2058 svm_vcpu_init(mach, vcpu); 2059 2060 return 0; 2061 2062error: 2063 if (cpudata->vmcb_pa) { 2064 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, 2065 VMCB_NPAGES); 2066 } 2067 if (cpudata->iobm_pa) { 2068 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, 2069 IOBM_NPAGES); 2070 } 2071 if (cpudata->msrbm_pa) { 2072 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, 2073 MSRBM_NPAGES); 2074 } 2075 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2076 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2077 return error; 2078} 2079 2080static void 2081svm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2082{ 2083 struct svm_cpudata *cpudata = vcpu->cpudata; 2084 2085 svm_asid_free(vcpu); 2086 2087 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, VMCB_NPAGES); 2088 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, IOBM_NPAGES); 2089 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, MSRBM_NPAGES); 2090 2091 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2092 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2093} 2094 2095/* -------------------------------------------------------------------------- */ 2096 2097static void 2098svm_tlb_flush(struct pmap *pm) 2099{ 2100 struct nvmm_machine *mach = pm->pm_data; 2101 struct svm_machdata *machdata = mach->machdata; 2102 2103 atomic_inc_64(&machdata->mach_htlb_gen); 2104 2105 /* Generates IPIs, which cause #VMEXITs. */ 2106 pmap_tlb_shootdown(pmap_kernel(), -1, PG_G, TLBSHOOT_UPDATE); 2107} 2108 2109static void 2110svm_machine_create(struct nvmm_machine *mach) 2111{ 2112 struct svm_machdata *machdata; 2113 2114 /* Fill in pmap info. */ 2115 mach->vm->vm_map.pmap->pm_data = (void *)mach; 2116 mach->vm->vm_map.pmap->pm_tlb_flush = svm_tlb_flush; 2117 2118 machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP); 2119 mach->machdata = machdata; 2120 2121 /* Start with an hTLB flush everywhere. */ 2122 machdata->mach_htlb_gen = 1; 2123} 2124 2125static void 2126svm_machine_destroy(struct nvmm_machine *mach) 2127{ 2128 kmem_free(mach->machdata, sizeof(struct svm_machdata)); 2129} 2130 2131static int 2132svm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *data) 2133{ 2134 struct nvmm_x86_conf_cpuid *cpuid = data; 2135 struct svm_machdata *machdata = (struct svm_machdata *)mach->machdata; 2136 size_t i; 2137 2138 if (__predict_false(op != NVMM_X86_CONF_CPUID)) { 2139 return EINVAL; 2140 } 2141 2142 if (__predict_false((cpuid->set.eax & cpuid->del.eax) || 2143 (cpuid->set.ebx & cpuid->del.ebx) || 2144 (cpuid->set.ecx & cpuid->del.ecx) || 2145 (cpuid->set.edx & cpuid->del.edx))) { 2146 return EINVAL; 2147 } 2148 2149 /* If already here, replace. */ 2150 for (i = 0; i < SVM_NCPUIDS; i++) { 2151 if (!machdata->cpuidpresent[i]) { 2152 continue; 2153 } 2154 if (machdata->cpuid[i].leaf == cpuid->leaf) { 2155 memcpy(&machdata->cpuid[i], cpuid, 2156 sizeof(struct nvmm_x86_conf_cpuid)); 2157 return 0; 2158 } 2159 } 2160 2161 /* Not here, insert. */ 2162 for (i = 0; i < SVM_NCPUIDS; i++) { 2163 if (!machdata->cpuidpresent[i]) { 2164 machdata->cpuidpresent[i] = true; 2165 memcpy(&machdata->cpuid[i], cpuid, 2166 sizeof(struct nvmm_x86_conf_cpuid)); 2167 return 0; 2168 } 2169 } 2170 2171 return ENOBUFS; 2172} 2173 2174/* -------------------------------------------------------------------------- */ 2175 2176static bool 2177svm_ident(void) 2178{ 2179 u_int descs[4]; 2180 uint64_t msr; 2181 2182 if (cpu_vendor != CPUVENDOR_AMD) { 2183 return false; 2184 } 2185 if (!(cpu_feature[3] & CPUID_SVM)) { 2186 return false; 2187 } 2188 2189 if (curcpu()->ci_max_ext_cpuid < 0x8000000a) { 2190 return false; 2191 } 2192 x86_cpuid(0x8000000a, descs); 2193 2194 /* Want Nested Paging. */ 2195 if (!(descs[3] & CPUID_AMD_SVM_NP)) { 2196 return false; 2197 } 2198 2199 /* Want nRIP. */ 2200 if (!(descs[3] & CPUID_AMD_SVM_NRIPS)) { 2201 return false; 2202 } 2203 2204 svm_decode_assist = (descs[3] & CPUID_AMD_SVM_DecodeAssist) != 0; 2205 2206 msr = rdmsr(MSR_VMCR); 2207 if ((msr & VMCR_SVMED) && (msr & VMCR_LOCK)) { 2208 return false; 2209 } 2210 2211 return true; 2212} 2213 2214static void 2215svm_init_asid(uint32_t maxasid) 2216{ 2217 size_t i, j, allocsz; 2218 2219 mutex_init(&svm_asidlock, MUTEX_DEFAULT, IPL_NONE); 2220 2221 /* Arbitrarily limit. */ 2222 maxasid = uimin(maxasid, 8192); 2223 2224 svm_maxasid = maxasid; 2225 allocsz = roundup(maxasid, 8) / 8; 2226 svm_asidmap = kmem_zalloc(allocsz, KM_SLEEP); 2227 2228 /* ASID 0 is reserved for the host. */ 2229 svm_asidmap[0] |= __BIT(0); 2230 2231 /* ASID n-1 is special, we share it. */ 2232 i = (maxasid - 1) / 8; 2233 j = (maxasid - 1) % 8; 2234 svm_asidmap[i] |= __BIT(j); 2235} 2236 2237static void 2238svm_change_cpu(void *arg1, void *arg2) 2239{ 2240 bool enable = (bool)arg1; 2241 uint64_t msr; 2242 2243 msr = rdmsr(MSR_VMCR); 2244 if (msr & VMCR_SVMED) { 2245 wrmsr(MSR_VMCR, msr & ~VMCR_SVMED); 2246 } 2247 2248 if (!enable) { 2249 wrmsr(MSR_VM_HSAVE_PA, 0); 2250 } 2251 2252 msr = rdmsr(MSR_EFER); 2253 if (enable) { 2254 msr |= EFER_SVME; 2255 } else { 2256 msr &= ~EFER_SVME; 2257 } 2258 wrmsr(MSR_EFER, msr); 2259 2260 if (enable) { 2261 wrmsr(MSR_VM_HSAVE_PA, hsave[cpu_index(curcpu())].pa); 2262 } 2263} 2264 2265static void 2266svm_init(void) 2267{ 2268 CPU_INFO_ITERATOR cii; 2269 struct cpu_info *ci; 2270 struct vm_page *pg; 2271 u_int descs[4]; 2272 uint64_t xc; 2273 2274 x86_cpuid(0x8000000a, descs); 2275 2276 /* The guest TLB flush command. */ 2277 if (descs[3] & CPUID_AMD_SVM_FlushByASID) { 2278 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_GUEST; 2279 } else { 2280 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_ALL; 2281 } 2282 2283 /* Init the ASID. */ 2284 svm_init_asid(descs[1]); 2285 2286 /* Init the XCR0 mask. */ 2287 svm_xcr0_mask = SVM_XCR0_MASK_DEFAULT & x86_xsave_features; 2288 2289 memset(hsave, 0, sizeof(hsave)); 2290 for (CPU_INFO_FOREACH(cii, ci)) { 2291 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); 2292 hsave[cpu_index(ci)].pa = VM_PAGE_TO_PHYS(pg); 2293 } 2294 2295 xc = xc_broadcast(0, svm_change_cpu, (void *)true, NULL); 2296 xc_wait(xc); 2297} 2298 2299static void 2300svm_fini_asid(void) 2301{ 2302 size_t allocsz; 2303 2304 allocsz = roundup(svm_maxasid, 8) / 8; 2305 kmem_free(svm_asidmap, allocsz); 2306 2307 mutex_destroy(&svm_asidlock); 2308} 2309 2310static void 2311svm_fini(void) 2312{ 2313 uint64_t xc; 2314 size_t i; 2315 2316 xc = xc_broadcast(0, svm_change_cpu, (void *)false, NULL); 2317 xc_wait(xc); 2318 2319 for (i = 0; i < MAXCPUS; i++) { 2320 if (hsave[i].pa != 0) 2321 uvm_pagefree(PHYS_TO_VM_PAGE(hsave[i].pa)); 2322 } 2323 2324 svm_fini_asid(); 2325} 2326 2327static void 2328svm_capability(struct nvmm_capability *cap) 2329{ 2330 cap->arch.xcr0_mask = svm_xcr0_mask; 2331 cap->arch.mxcsr_mask = x86_fpu_mxcsr_mask; 2332 cap->arch.conf_cpuid_maxops = SVM_NCPUIDS; 2333} 2334 2335const struct nvmm_impl nvmm_x86_svm = { 2336 .ident = svm_ident, 2337 .init = svm_init, 2338 .fini = svm_fini, 2339 .capability = svm_capability, 2340 .conf_max = NVMM_X86_NCONF, 2341 .conf_sizes = svm_conf_sizes, 2342 .state_size = sizeof(struct nvmm_x64_state), 2343 .machine_create = svm_machine_create, 2344 .machine_destroy = svm_machine_destroy, 2345 .machine_configure = svm_machine_configure, 2346 .vcpu_create = svm_vcpu_create, 2347 .vcpu_destroy = svm_vcpu_destroy, 2348 .vcpu_setstate = svm_vcpu_setstate, 2349 .vcpu_getstate = svm_vcpu_getstate, 2350 .vcpu_inject = svm_vcpu_inject, 2351 .vcpu_run = svm_vcpu_run 2352}; 2353