nvmm_x86_svm.c revision 1.52
1/* $NetBSD: nvmm_x86_svm.c,v 1.52 2019/10/27 10:28:55 maxv Exp $ */ 2 3/* 4 * Copyright (c) 2018-2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.52 2019/10/27 10:28:55 maxv Exp $"); 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/kmem.h> 39#include <sys/cpu.h> 40#include <sys/xcall.h> 41#include <sys/mman.h> 42 43#include <uvm/uvm.h> 44#include <uvm/uvm_page.h> 45 46#include <x86/cputypes.h> 47#include <x86/specialreg.h> 48#include <x86/pmap.h> 49#include <x86/dbregs.h> 50#include <x86/cpu_counter.h> 51#include <machine/cpuvar.h> 52 53#include <dev/nvmm/nvmm.h> 54#include <dev/nvmm/nvmm_internal.h> 55#include <dev/nvmm/x86/nvmm_x86.h> 56 57int svm_vmrun(paddr_t, uint64_t *); 58 59#define MSR_VM_HSAVE_PA 0xC0010117 60 61/* -------------------------------------------------------------------------- */ 62 63#define VMCB_EXITCODE_CR0_READ 0x0000 64#define VMCB_EXITCODE_CR1_READ 0x0001 65#define VMCB_EXITCODE_CR2_READ 0x0002 66#define VMCB_EXITCODE_CR3_READ 0x0003 67#define VMCB_EXITCODE_CR4_READ 0x0004 68#define VMCB_EXITCODE_CR5_READ 0x0005 69#define VMCB_EXITCODE_CR6_READ 0x0006 70#define VMCB_EXITCODE_CR7_READ 0x0007 71#define VMCB_EXITCODE_CR8_READ 0x0008 72#define VMCB_EXITCODE_CR9_READ 0x0009 73#define VMCB_EXITCODE_CR10_READ 0x000A 74#define VMCB_EXITCODE_CR11_READ 0x000B 75#define VMCB_EXITCODE_CR12_READ 0x000C 76#define VMCB_EXITCODE_CR13_READ 0x000D 77#define VMCB_EXITCODE_CR14_READ 0x000E 78#define VMCB_EXITCODE_CR15_READ 0x000F 79#define VMCB_EXITCODE_CR0_WRITE 0x0010 80#define VMCB_EXITCODE_CR1_WRITE 0x0011 81#define VMCB_EXITCODE_CR2_WRITE 0x0012 82#define VMCB_EXITCODE_CR3_WRITE 0x0013 83#define VMCB_EXITCODE_CR4_WRITE 0x0014 84#define VMCB_EXITCODE_CR5_WRITE 0x0015 85#define VMCB_EXITCODE_CR6_WRITE 0x0016 86#define VMCB_EXITCODE_CR7_WRITE 0x0017 87#define VMCB_EXITCODE_CR8_WRITE 0x0018 88#define VMCB_EXITCODE_CR9_WRITE 0x0019 89#define VMCB_EXITCODE_CR10_WRITE 0x001A 90#define VMCB_EXITCODE_CR11_WRITE 0x001B 91#define VMCB_EXITCODE_CR12_WRITE 0x001C 92#define VMCB_EXITCODE_CR13_WRITE 0x001D 93#define VMCB_EXITCODE_CR14_WRITE 0x001E 94#define VMCB_EXITCODE_CR15_WRITE 0x001F 95#define VMCB_EXITCODE_DR0_READ 0x0020 96#define VMCB_EXITCODE_DR1_READ 0x0021 97#define VMCB_EXITCODE_DR2_READ 0x0022 98#define VMCB_EXITCODE_DR3_READ 0x0023 99#define VMCB_EXITCODE_DR4_READ 0x0024 100#define VMCB_EXITCODE_DR5_READ 0x0025 101#define VMCB_EXITCODE_DR6_READ 0x0026 102#define VMCB_EXITCODE_DR7_READ 0x0027 103#define VMCB_EXITCODE_DR8_READ 0x0028 104#define VMCB_EXITCODE_DR9_READ 0x0029 105#define VMCB_EXITCODE_DR10_READ 0x002A 106#define VMCB_EXITCODE_DR11_READ 0x002B 107#define VMCB_EXITCODE_DR12_READ 0x002C 108#define VMCB_EXITCODE_DR13_READ 0x002D 109#define VMCB_EXITCODE_DR14_READ 0x002E 110#define VMCB_EXITCODE_DR15_READ 0x002F 111#define VMCB_EXITCODE_DR0_WRITE 0x0030 112#define VMCB_EXITCODE_DR1_WRITE 0x0031 113#define VMCB_EXITCODE_DR2_WRITE 0x0032 114#define VMCB_EXITCODE_DR3_WRITE 0x0033 115#define VMCB_EXITCODE_DR4_WRITE 0x0034 116#define VMCB_EXITCODE_DR5_WRITE 0x0035 117#define VMCB_EXITCODE_DR6_WRITE 0x0036 118#define VMCB_EXITCODE_DR7_WRITE 0x0037 119#define VMCB_EXITCODE_DR8_WRITE 0x0038 120#define VMCB_EXITCODE_DR9_WRITE 0x0039 121#define VMCB_EXITCODE_DR10_WRITE 0x003A 122#define VMCB_EXITCODE_DR11_WRITE 0x003B 123#define VMCB_EXITCODE_DR12_WRITE 0x003C 124#define VMCB_EXITCODE_DR13_WRITE 0x003D 125#define VMCB_EXITCODE_DR14_WRITE 0x003E 126#define VMCB_EXITCODE_DR15_WRITE 0x003F 127#define VMCB_EXITCODE_EXCP0 0x0040 128#define VMCB_EXITCODE_EXCP1 0x0041 129#define VMCB_EXITCODE_EXCP2 0x0042 130#define VMCB_EXITCODE_EXCP3 0x0043 131#define VMCB_EXITCODE_EXCP4 0x0044 132#define VMCB_EXITCODE_EXCP5 0x0045 133#define VMCB_EXITCODE_EXCP6 0x0046 134#define VMCB_EXITCODE_EXCP7 0x0047 135#define VMCB_EXITCODE_EXCP8 0x0048 136#define VMCB_EXITCODE_EXCP9 0x0049 137#define VMCB_EXITCODE_EXCP10 0x004A 138#define VMCB_EXITCODE_EXCP11 0x004B 139#define VMCB_EXITCODE_EXCP12 0x004C 140#define VMCB_EXITCODE_EXCP13 0x004D 141#define VMCB_EXITCODE_EXCP14 0x004E 142#define VMCB_EXITCODE_EXCP15 0x004F 143#define VMCB_EXITCODE_EXCP16 0x0050 144#define VMCB_EXITCODE_EXCP17 0x0051 145#define VMCB_EXITCODE_EXCP18 0x0052 146#define VMCB_EXITCODE_EXCP19 0x0053 147#define VMCB_EXITCODE_EXCP20 0x0054 148#define VMCB_EXITCODE_EXCP21 0x0055 149#define VMCB_EXITCODE_EXCP22 0x0056 150#define VMCB_EXITCODE_EXCP23 0x0057 151#define VMCB_EXITCODE_EXCP24 0x0058 152#define VMCB_EXITCODE_EXCP25 0x0059 153#define VMCB_EXITCODE_EXCP26 0x005A 154#define VMCB_EXITCODE_EXCP27 0x005B 155#define VMCB_EXITCODE_EXCP28 0x005C 156#define VMCB_EXITCODE_EXCP29 0x005D 157#define VMCB_EXITCODE_EXCP30 0x005E 158#define VMCB_EXITCODE_EXCP31 0x005F 159#define VMCB_EXITCODE_INTR 0x0060 160#define VMCB_EXITCODE_NMI 0x0061 161#define VMCB_EXITCODE_SMI 0x0062 162#define VMCB_EXITCODE_INIT 0x0063 163#define VMCB_EXITCODE_VINTR 0x0064 164#define VMCB_EXITCODE_CR0_SEL_WRITE 0x0065 165#define VMCB_EXITCODE_IDTR_READ 0x0066 166#define VMCB_EXITCODE_GDTR_READ 0x0067 167#define VMCB_EXITCODE_LDTR_READ 0x0068 168#define VMCB_EXITCODE_TR_READ 0x0069 169#define VMCB_EXITCODE_IDTR_WRITE 0x006A 170#define VMCB_EXITCODE_GDTR_WRITE 0x006B 171#define VMCB_EXITCODE_LDTR_WRITE 0x006C 172#define VMCB_EXITCODE_TR_WRITE 0x006D 173#define VMCB_EXITCODE_RDTSC 0x006E 174#define VMCB_EXITCODE_RDPMC 0x006F 175#define VMCB_EXITCODE_PUSHF 0x0070 176#define VMCB_EXITCODE_POPF 0x0071 177#define VMCB_EXITCODE_CPUID 0x0072 178#define VMCB_EXITCODE_RSM 0x0073 179#define VMCB_EXITCODE_IRET 0x0074 180#define VMCB_EXITCODE_SWINT 0x0075 181#define VMCB_EXITCODE_INVD 0x0076 182#define VMCB_EXITCODE_PAUSE 0x0077 183#define VMCB_EXITCODE_HLT 0x0078 184#define VMCB_EXITCODE_INVLPG 0x0079 185#define VMCB_EXITCODE_INVLPGA 0x007A 186#define VMCB_EXITCODE_IOIO 0x007B 187#define VMCB_EXITCODE_MSR 0x007C 188#define VMCB_EXITCODE_TASK_SWITCH 0x007D 189#define VMCB_EXITCODE_FERR_FREEZE 0x007E 190#define VMCB_EXITCODE_SHUTDOWN 0x007F 191#define VMCB_EXITCODE_VMRUN 0x0080 192#define VMCB_EXITCODE_VMMCALL 0x0081 193#define VMCB_EXITCODE_VMLOAD 0x0082 194#define VMCB_EXITCODE_VMSAVE 0x0083 195#define VMCB_EXITCODE_STGI 0x0084 196#define VMCB_EXITCODE_CLGI 0x0085 197#define VMCB_EXITCODE_SKINIT 0x0086 198#define VMCB_EXITCODE_RDTSCP 0x0087 199#define VMCB_EXITCODE_ICEBP 0x0088 200#define VMCB_EXITCODE_WBINVD 0x0089 201#define VMCB_EXITCODE_MONITOR 0x008A 202#define VMCB_EXITCODE_MWAIT 0x008B 203#define VMCB_EXITCODE_MWAIT_CONDITIONAL 0x008C 204#define VMCB_EXITCODE_XSETBV 0x008D 205#define VMCB_EXITCODE_RDPRU 0x008E 206#define VMCB_EXITCODE_EFER_WRITE_TRAP 0x008F 207#define VMCB_EXITCODE_CR0_WRITE_TRAP 0x0090 208#define VMCB_EXITCODE_CR1_WRITE_TRAP 0x0091 209#define VMCB_EXITCODE_CR2_WRITE_TRAP 0x0092 210#define VMCB_EXITCODE_CR3_WRITE_TRAP 0x0093 211#define VMCB_EXITCODE_CR4_WRITE_TRAP 0x0094 212#define VMCB_EXITCODE_CR5_WRITE_TRAP 0x0095 213#define VMCB_EXITCODE_CR6_WRITE_TRAP 0x0096 214#define VMCB_EXITCODE_CR7_WRITE_TRAP 0x0097 215#define VMCB_EXITCODE_CR8_WRITE_TRAP 0x0098 216#define VMCB_EXITCODE_CR9_WRITE_TRAP 0x0099 217#define VMCB_EXITCODE_CR10_WRITE_TRAP 0x009A 218#define VMCB_EXITCODE_CR11_WRITE_TRAP 0x009B 219#define VMCB_EXITCODE_CR12_WRITE_TRAP 0x009C 220#define VMCB_EXITCODE_CR13_WRITE_TRAP 0x009D 221#define VMCB_EXITCODE_CR14_WRITE_TRAP 0x009E 222#define VMCB_EXITCODE_CR15_WRITE_TRAP 0x009F 223#define VMCB_EXITCODE_MCOMMIT 0x00A3 224#define VMCB_EXITCODE_NPF 0x0400 225#define VMCB_EXITCODE_AVIC_INCOMP_IPI 0x0401 226#define VMCB_EXITCODE_AVIC_NOACCEL 0x0402 227#define VMCB_EXITCODE_VMGEXIT 0x0403 228#define VMCB_EXITCODE_INVALID -1 229 230/* -------------------------------------------------------------------------- */ 231 232struct vmcb_ctrl { 233 uint32_t intercept_cr; 234#define VMCB_CTRL_INTERCEPT_RCR(x) __BIT( 0 + x) 235#define VMCB_CTRL_INTERCEPT_WCR(x) __BIT(16 + x) 236 237 uint32_t intercept_dr; 238#define VMCB_CTRL_INTERCEPT_RDR(x) __BIT( 0 + x) 239#define VMCB_CTRL_INTERCEPT_WDR(x) __BIT(16 + x) 240 241 uint32_t intercept_vec; 242#define VMCB_CTRL_INTERCEPT_VEC(x) __BIT(x) 243 244 uint32_t intercept_misc1; 245#define VMCB_CTRL_INTERCEPT_INTR __BIT(0) 246#define VMCB_CTRL_INTERCEPT_NMI __BIT(1) 247#define VMCB_CTRL_INTERCEPT_SMI __BIT(2) 248#define VMCB_CTRL_INTERCEPT_INIT __BIT(3) 249#define VMCB_CTRL_INTERCEPT_VINTR __BIT(4) 250#define VMCB_CTRL_INTERCEPT_CR0_SPEC __BIT(5) 251#define VMCB_CTRL_INTERCEPT_RIDTR __BIT(6) 252#define VMCB_CTRL_INTERCEPT_RGDTR __BIT(7) 253#define VMCB_CTRL_INTERCEPT_RLDTR __BIT(8) 254#define VMCB_CTRL_INTERCEPT_RTR __BIT(9) 255#define VMCB_CTRL_INTERCEPT_WIDTR __BIT(10) 256#define VMCB_CTRL_INTERCEPT_WGDTR __BIT(11) 257#define VMCB_CTRL_INTERCEPT_WLDTR __BIT(12) 258#define VMCB_CTRL_INTERCEPT_WTR __BIT(13) 259#define VMCB_CTRL_INTERCEPT_RDTSC __BIT(14) 260#define VMCB_CTRL_INTERCEPT_RDPMC __BIT(15) 261#define VMCB_CTRL_INTERCEPT_PUSHF __BIT(16) 262#define VMCB_CTRL_INTERCEPT_POPF __BIT(17) 263#define VMCB_CTRL_INTERCEPT_CPUID __BIT(18) 264#define VMCB_CTRL_INTERCEPT_RSM __BIT(19) 265#define VMCB_CTRL_INTERCEPT_IRET __BIT(20) 266#define VMCB_CTRL_INTERCEPT_INTN __BIT(21) 267#define VMCB_CTRL_INTERCEPT_INVD __BIT(22) 268#define VMCB_CTRL_INTERCEPT_PAUSE __BIT(23) 269#define VMCB_CTRL_INTERCEPT_HLT __BIT(24) 270#define VMCB_CTRL_INTERCEPT_INVLPG __BIT(25) 271#define VMCB_CTRL_INTERCEPT_INVLPGA __BIT(26) 272#define VMCB_CTRL_INTERCEPT_IOIO_PROT __BIT(27) 273#define VMCB_CTRL_INTERCEPT_MSR_PROT __BIT(28) 274#define VMCB_CTRL_INTERCEPT_TASKSW __BIT(29) 275#define VMCB_CTRL_INTERCEPT_FERR_FREEZE __BIT(30) 276#define VMCB_CTRL_INTERCEPT_SHUTDOWN __BIT(31) 277 278 uint32_t intercept_misc2; 279#define VMCB_CTRL_INTERCEPT_VMRUN __BIT(0) 280#define VMCB_CTRL_INTERCEPT_VMMCALL __BIT(1) 281#define VMCB_CTRL_INTERCEPT_VMLOAD __BIT(2) 282#define VMCB_CTRL_INTERCEPT_VMSAVE __BIT(3) 283#define VMCB_CTRL_INTERCEPT_STGI __BIT(4) 284#define VMCB_CTRL_INTERCEPT_CLGI __BIT(5) 285#define VMCB_CTRL_INTERCEPT_SKINIT __BIT(6) 286#define VMCB_CTRL_INTERCEPT_RDTSCP __BIT(7) 287#define VMCB_CTRL_INTERCEPT_ICEBP __BIT(8) 288#define VMCB_CTRL_INTERCEPT_WBINVD __BIT(9) 289#define VMCB_CTRL_INTERCEPT_MONITOR __BIT(10) 290#define VMCB_CTRL_INTERCEPT_MWAIT __BIT(11) 291#define VMCB_CTRL_INTERCEPT_MWAIT_ARMED __BIT(12) 292#define VMCB_CTRL_INTERCEPT_XSETBV __BIT(13) 293#define VMCB_CTRL_INTERCEPT_RDPRU __BIT(14) 294#define VMCB_CTRL_INTERCEPT_EFER_SPEC __BIT(15) 295#define VMCB_CTRL_INTERCEPT_WCR_SPEC(x) __BIT(16 + x) 296 297 uint32_t intercept_misc3; 298#define VMCB_CTRL_INTERCEPT_MCOMMIT __BIT(3) 299 300 uint8_t rsvd1[36]; 301 uint16_t pause_filt_thresh; 302 uint16_t pause_filt_cnt; 303 uint64_t iopm_base_pa; 304 uint64_t msrpm_base_pa; 305 uint64_t tsc_offset; 306 uint32_t guest_asid; 307 308 uint32_t tlb_ctrl; 309#define VMCB_CTRL_TLB_CTRL_FLUSH_ALL 0x01 310#define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST 0x03 311#define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST_NONGLOBAL 0x07 312 313 uint64_t v; 314#define VMCB_CTRL_V_TPR __BITS(3,0) 315#define VMCB_CTRL_V_IRQ __BIT(8) 316#define VMCB_CTRL_V_VGIF __BIT(9) 317#define VMCB_CTRL_V_INTR_PRIO __BITS(19,16) 318#define VMCB_CTRL_V_IGN_TPR __BIT(20) 319#define VMCB_CTRL_V_INTR_MASKING __BIT(24) 320#define VMCB_CTRL_V_GUEST_VGIF __BIT(25) 321#define VMCB_CTRL_V_AVIC_EN __BIT(31) 322#define VMCB_CTRL_V_INTR_VECTOR __BITS(39,32) 323 324 uint64_t intr; 325#define VMCB_CTRL_INTR_SHADOW __BIT(0) 326 327 uint64_t exitcode; 328 uint64_t exitinfo1; 329 uint64_t exitinfo2; 330 331 uint64_t exitintinfo; 332#define VMCB_CTRL_EXITINTINFO_VECTOR __BITS(7,0) 333#define VMCB_CTRL_EXITINTINFO_TYPE __BITS(10,8) 334#define VMCB_CTRL_EXITINTINFO_EV __BIT(11) 335#define VMCB_CTRL_EXITINTINFO_V __BIT(31) 336#define VMCB_CTRL_EXITINTINFO_ERRORCODE __BITS(63,32) 337 338 uint64_t enable1; 339#define VMCB_CTRL_ENABLE_NP __BIT(0) 340#define VMCB_CTRL_ENABLE_SEV __BIT(1) 341#define VMCB_CTRL_ENABLE_ES_SEV __BIT(2) 342#define VMCB_CTRL_ENABLE_GMET __BIT(3) 343#define VMCB_CTRL_ENABLE_VTE __BIT(5) 344 345 uint64_t avic; 346#define VMCB_CTRL_AVIC_APIC_BAR __BITS(51,0) 347 348 uint64_t ghcb; 349 350 uint64_t eventinj; 351#define VMCB_CTRL_EVENTINJ_VECTOR __BITS(7,0) 352#define VMCB_CTRL_EVENTINJ_TYPE __BITS(10,8) 353#define VMCB_CTRL_EVENTINJ_EV __BIT(11) 354#define VMCB_CTRL_EVENTINJ_V __BIT(31) 355#define VMCB_CTRL_EVENTINJ_ERRORCODE __BITS(63,32) 356 357 uint64_t n_cr3; 358 359 uint64_t enable2; 360#define VMCB_CTRL_ENABLE_LBR __BIT(0) 361#define VMCB_CTRL_ENABLE_VVMSAVE __BIT(1) 362 363 uint32_t vmcb_clean; 364#define VMCB_CTRL_VMCB_CLEAN_I __BIT(0) 365#define VMCB_CTRL_VMCB_CLEAN_IOPM __BIT(1) 366#define VMCB_CTRL_VMCB_CLEAN_ASID __BIT(2) 367#define VMCB_CTRL_VMCB_CLEAN_TPR __BIT(3) 368#define VMCB_CTRL_VMCB_CLEAN_NP __BIT(4) 369#define VMCB_CTRL_VMCB_CLEAN_CR __BIT(5) 370#define VMCB_CTRL_VMCB_CLEAN_DR __BIT(6) 371#define VMCB_CTRL_VMCB_CLEAN_DT __BIT(7) 372#define VMCB_CTRL_VMCB_CLEAN_SEG __BIT(8) 373#define VMCB_CTRL_VMCB_CLEAN_CR2 __BIT(9) 374#define VMCB_CTRL_VMCB_CLEAN_LBR __BIT(10) 375#define VMCB_CTRL_VMCB_CLEAN_AVIC __BIT(11) 376 377 uint32_t rsvd2; 378 uint64_t nrip; 379 uint8_t inst_len; 380 uint8_t inst_bytes[15]; 381 uint64_t avic_abpp; 382 uint64_t rsvd3; 383 uint64_t avic_ltp; 384 385 uint64_t avic_phys; 386#define VMCB_CTRL_AVIC_PHYS_TABLE_PTR __BITS(51,12) 387#define VMCB_CTRL_AVIC_PHYS_MAX_INDEX __BITS(7,0) 388 389 uint64_t rsvd4; 390 uint64_t vmcb_ptr; 391 392 uint8_t pad[752]; 393} __packed; 394 395CTASSERT(sizeof(struct vmcb_ctrl) == 1024); 396 397struct vmcb_segment { 398 uint16_t selector; 399 uint16_t attrib; /* hidden */ 400 uint32_t limit; /* hidden */ 401 uint64_t base; /* hidden */ 402} __packed; 403 404CTASSERT(sizeof(struct vmcb_segment) == 16); 405 406struct vmcb_state { 407 struct vmcb_segment es; 408 struct vmcb_segment cs; 409 struct vmcb_segment ss; 410 struct vmcb_segment ds; 411 struct vmcb_segment fs; 412 struct vmcb_segment gs; 413 struct vmcb_segment gdt; 414 struct vmcb_segment ldt; 415 struct vmcb_segment idt; 416 struct vmcb_segment tr; 417 uint8_t rsvd1[43]; 418 uint8_t cpl; 419 uint8_t rsvd2[4]; 420 uint64_t efer; 421 uint8_t rsvd3[112]; 422 uint64_t cr4; 423 uint64_t cr3; 424 uint64_t cr0; 425 uint64_t dr7; 426 uint64_t dr6; 427 uint64_t rflags; 428 uint64_t rip; 429 uint8_t rsvd4[88]; 430 uint64_t rsp; 431 uint8_t rsvd5[24]; 432 uint64_t rax; 433 uint64_t star; 434 uint64_t lstar; 435 uint64_t cstar; 436 uint64_t sfmask; 437 uint64_t kernelgsbase; 438 uint64_t sysenter_cs; 439 uint64_t sysenter_esp; 440 uint64_t sysenter_eip; 441 uint64_t cr2; 442 uint8_t rsvd6[32]; 443 uint64_t g_pat; 444 uint64_t dbgctl; 445 uint64_t br_from; 446 uint64_t br_to; 447 uint64_t int_from; 448 uint64_t int_to; 449 uint8_t pad[2408]; 450} __packed; 451 452CTASSERT(sizeof(struct vmcb_state) == 0xC00); 453 454struct vmcb { 455 struct vmcb_ctrl ctrl; 456 struct vmcb_state state; 457} __packed; 458 459CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); 460CTASSERT(offsetof(struct vmcb, state) == 0x400); 461 462/* -------------------------------------------------------------------------- */ 463 464static void svm_vcpu_state_provide(struct nvmm_cpu *, uint64_t); 465static void svm_vcpu_state_commit(struct nvmm_cpu *); 466 467struct svm_hsave { 468 paddr_t pa; 469}; 470 471static struct svm_hsave hsave[MAXCPUS]; 472 473static uint8_t *svm_asidmap __read_mostly; 474static uint32_t svm_maxasid __read_mostly; 475static kmutex_t svm_asidlock __cacheline_aligned; 476 477static bool svm_decode_assist __read_mostly; 478static uint32_t svm_ctrl_tlb_flush __read_mostly; 479 480#define SVM_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) 481static uint64_t svm_xcr0_mask __read_mostly; 482 483#define SVM_NCPUIDS 32 484 485#define VMCB_NPAGES 1 486 487#define MSRBM_NPAGES 2 488#define MSRBM_SIZE (MSRBM_NPAGES * PAGE_SIZE) 489 490#define IOBM_NPAGES 3 491#define IOBM_SIZE (IOBM_NPAGES * PAGE_SIZE) 492 493/* Does not include EFER_LMSLE. */ 494#define EFER_VALID \ 495 (EFER_SCE|EFER_LME|EFER_LMA|EFER_NXE|EFER_SVME|EFER_FFXSR|EFER_TCE) 496 497#define EFER_TLB_FLUSH \ 498 (EFER_NXE|EFER_LMA|EFER_LME) 499#define CR0_TLB_FLUSH \ 500 (CR0_PG|CR0_WP|CR0_CD|CR0_NW) 501#define CR4_TLB_FLUSH \ 502 (CR4_PGE|CR4_PAE|CR4_PSE) 503 504/* -------------------------------------------------------------------------- */ 505 506struct svm_machdata { 507 volatile uint64_t mach_htlb_gen; 508}; 509 510static const size_t svm_vcpu_conf_sizes[NVMM_X86_VCPU_NCONF] = { 511 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID)] = 512 sizeof(struct nvmm_vcpu_conf_cpuid), 513 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_TPR)] = 514 sizeof(struct nvmm_vcpu_conf_tpr) 515}; 516 517struct svm_cpudata { 518 /* General */ 519 bool shared_asid; 520 bool gtlb_want_flush; 521 bool gtsc_want_update; 522 uint64_t vcpu_htlb_gen; 523 524 /* VMCB */ 525 struct vmcb *vmcb; 526 paddr_t vmcb_pa; 527 528 /* I/O bitmap */ 529 uint8_t *iobm; 530 paddr_t iobm_pa; 531 532 /* MSR bitmap */ 533 uint8_t *msrbm; 534 paddr_t msrbm_pa; 535 536 /* Host state */ 537 uint64_t hxcr0; 538 uint64_t star; 539 uint64_t lstar; 540 uint64_t cstar; 541 uint64_t sfmask; 542 uint64_t fsbase; 543 uint64_t kernelgsbase; 544 545 /* Intr state */ 546 bool int_window_exit; 547 bool nmi_window_exit; 548 bool evt_pending; 549 550 /* Guest state */ 551 uint64_t gxcr0; 552 uint64_t gprs[NVMM_X64_NGPR]; 553 uint64_t drs[NVMM_X64_NDR]; 554 uint64_t gtsc; 555 struct xsave_header gfpu __aligned(64); 556 557 /* VCPU configuration. */ 558 bool cpuidpresent[SVM_NCPUIDS]; 559 struct nvmm_vcpu_conf_cpuid cpuid[SVM_NCPUIDS]; 560}; 561 562static void 563svm_vmcb_cache_default(struct vmcb *vmcb) 564{ 565 vmcb->ctrl.vmcb_clean = 566 VMCB_CTRL_VMCB_CLEAN_I | 567 VMCB_CTRL_VMCB_CLEAN_IOPM | 568 VMCB_CTRL_VMCB_CLEAN_ASID | 569 VMCB_CTRL_VMCB_CLEAN_TPR | 570 VMCB_CTRL_VMCB_CLEAN_NP | 571 VMCB_CTRL_VMCB_CLEAN_CR | 572 VMCB_CTRL_VMCB_CLEAN_DR | 573 VMCB_CTRL_VMCB_CLEAN_DT | 574 VMCB_CTRL_VMCB_CLEAN_SEG | 575 VMCB_CTRL_VMCB_CLEAN_CR2 | 576 VMCB_CTRL_VMCB_CLEAN_LBR | 577 VMCB_CTRL_VMCB_CLEAN_AVIC; 578} 579 580static void 581svm_vmcb_cache_update(struct vmcb *vmcb, uint64_t flags) 582{ 583 if (flags & NVMM_X64_STATE_SEGS) { 584 vmcb->ctrl.vmcb_clean &= 585 ~(VMCB_CTRL_VMCB_CLEAN_SEG | VMCB_CTRL_VMCB_CLEAN_DT); 586 } 587 if (flags & NVMM_X64_STATE_CRS) { 588 vmcb->ctrl.vmcb_clean &= 589 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_CR2 | 590 VMCB_CTRL_VMCB_CLEAN_TPR); 591 } 592 if (flags & NVMM_X64_STATE_DRS) { 593 vmcb->ctrl.vmcb_clean &= ~VMCB_CTRL_VMCB_CLEAN_DR; 594 } 595 if (flags & NVMM_X64_STATE_MSRS) { 596 /* CR for EFER, NP for PAT. */ 597 vmcb->ctrl.vmcb_clean &= 598 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_NP); 599 } 600} 601 602static inline void 603svm_vmcb_cache_flush(struct vmcb *vmcb, uint64_t flags) 604{ 605 vmcb->ctrl.vmcb_clean &= ~flags; 606} 607 608static inline void 609svm_vmcb_cache_flush_all(struct vmcb *vmcb) 610{ 611 vmcb->ctrl.vmcb_clean = 0; 612} 613 614#define SVM_EVENT_TYPE_HW_INT 0 615#define SVM_EVENT_TYPE_NMI 2 616#define SVM_EVENT_TYPE_EXC 3 617#define SVM_EVENT_TYPE_SW_INT 4 618 619static void 620svm_event_waitexit_enable(struct nvmm_cpu *vcpu, bool nmi) 621{ 622 struct svm_cpudata *cpudata = vcpu->cpudata; 623 struct vmcb *vmcb = cpudata->vmcb; 624 625 if (nmi) { 626 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_IRET; 627 cpudata->nmi_window_exit = true; 628 } else { 629 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_VINTR; 630 vmcb->ctrl.v |= (VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 631 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 632 cpudata->int_window_exit = true; 633 } 634 635 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 636} 637 638static void 639svm_event_waitexit_disable(struct nvmm_cpu *vcpu, bool nmi) 640{ 641 struct svm_cpudata *cpudata = vcpu->cpudata; 642 struct vmcb *vmcb = cpudata->vmcb; 643 644 if (nmi) { 645 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_IRET; 646 cpudata->nmi_window_exit = false; 647 } else { 648 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_VINTR; 649 vmcb->ctrl.v &= ~(VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 650 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 651 cpudata->int_window_exit = false; 652 } 653 654 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 655} 656 657static inline int 658svm_event_has_error(uint8_t vector) 659{ 660 switch (vector) { 661 case 8: /* #DF */ 662 case 10: /* #TS */ 663 case 11: /* #NP */ 664 case 12: /* #SS */ 665 case 13: /* #GP */ 666 case 14: /* #PF */ 667 case 17: /* #AC */ 668 case 30: /* #SX */ 669 return 1; 670 default: 671 return 0; 672 } 673} 674 675static int 676svm_vcpu_inject(struct nvmm_cpu *vcpu) 677{ 678 struct nvmm_comm_page *comm = vcpu->comm; 679 struct svm_cpudata *cpudata = vcpu->cpudata; 680 struct vmcb *vmcb = cpudata->vmcb; 681 u_int evtype; 682 uint8_t vector; 683 uint64_t error; 684 int type = 0, err = 0; 685 686 evtype = comm->event.type; 687 vector = comm->event.vector; 688 error = comm->event.u.excp.error; 689 __insn_barrier(); 690 691 switch (evtype) { 692 case NVMM_VCPU_EVENT_EXCP: 693 type = SVM_EVENT_TYPE_EXC; 694 if (vector == 2 || vector >= 32) 695 return EINVAL; 696 if (vector == 3 || vector == 0) 697 return EINVAL; 698 err = svm_event_has_error(vector); 699 break; 700 case NVMM_VCPU_EVENT_INTR: 701 type = SVM_EVENT_TYPE_HW_INT; 702 if (vector == 2) { 703 type = SVM_EVENT_TYPE_NMI; 704 svm_event_waitexit_enable(vcpu, true); 705 } 706 err = 0; 707 break; 708 default: 709 return EINVAL; 710 } 711 712 vmcb->ctrl.eventinj = 713 __SHIFTIN((uint64_t)vector, VMCB_CTRL_EVENTINJ_VECTOR) | 714 __SHIFTIN((uint64_t)type, VMCB_CTRL_EVENTINJ_TYPE) | 715 __SHIFTIN((uint64_t)err, VMCB_CTRL_EVENTINJ_EV) | 716 __SHIFTIN((uint64_t)1, VMCB_CTRL_EVENTINJ_V) | 717 __SHIFTIN((uint64_t)error, VMCB_CTRL_EVENTINJ_ERRORCODE); 718 719 cpudata->evt_pending = true; 720 721 return 0; 722} 723 724static void 725svm_inject_ud(struct nvmm_cpu *vcpu) 726{ 727 struct nvmm_comm_page *comm = vcpu->comm; 728 int ret __diagused; 729 730 comm->event.type = NVMM_VCPU_EVENT_EXCP; 731 comm->event.vector = 6; 732 comm->event.u.excp.error = 0; 733 734 ret = svm_vcpu_inject(vcpu); 735 KASSERT(ret == 0); 736} 737 738static void 739svm_inject_gp(struct nvmm_cpu *vcpu) 740{ 741 struct nvmm_comm_page *comm = vcpu->comm; 742 int ret __diagused; 743 744 comm->event.type = NVMM_VCPU_EVENT_EXCP; 745 comm->event.vector = 13; 746 comm->event.u.excp.error = 0; 747 748 ret = svm_vcpu_inject(vcpu); 749 KASSERT(ret == 0); 750} 751 752static inline int 753svm_vcpu_event_commit(struct nvmm_cpu *vcpu) 754{ 755 if (__predict_true(!vcpu->comm->event_commit)) { 756 return 0; 757 } 758 vcpu->comm->event_commit = false; 759 return svm_vcpu_inject(vcpu); 760} 761 762static inline void 763svm_inkernel_advance(struct vmcb *vmcb) 764{ 765 /* 766 * Maybe we should also apply single-stepping and debug exceptions. 767 * Matters for guest-ring3, because it can execute 'cpuid' under a 768 * debugger. 769 */ 770 vmcb->state.rip = vmcb->ctrl.nrip; 771 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 772} 773 774static void 775svm_inkernel_handle_cpuid(struct nvmm_cpu *vcpu, uint64_t eax, uint64_t ecx) 776{ 777 struct svm_cpudata *cpudata = vcpu->cpudata; 778 uint64_t cr4; 779 780 switch (eax) { 781 case 0x00000001: 782 cpudata->vmcb->state.rax &= nvmm_cpuid_00000001.eax; 783 784 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~CPUID_LOCAL_APIC_ID; 785 cpudata->gprs[NVMM_X64_GPR_RBX] |= __SHIFTIN(vcpu->cpuid, 786 CPUID_LOCAL_APIC_ID); 787 788 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000001.ecx; 789 cpudata->gprs[NVMM_X64_GPR_RCX] |= CPUID2_RAZ; 790 791 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000001.edx; 792 793 /* CPUID2_OSXSAVE depends on CR4. */ 794 cr4 = cpudata->vmcb->state.cr4; 795 if (!(cr4 & CR4_OSXSAVE)) { 796 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~CPUID2_OSXSAVE; 797 } 798 break; 799 case 0x00000005: 800 case 0x00000006: 801 cpudata->vmcb->state.rax = 0; 802 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 803 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 804 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 805 break; 806 case 0x00000007: 807 cpudata->vmcb->state.rax &= nvmm_cpuid_00000007.eax; 808 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_00000007.ebx; 809 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000007.ecx; 810 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000007.edx; 811 break; 812 case 0x0000000D: 813 if (svm_xcr0_mask == 0) { 814 break; 815 } 816 switch (ecx) { 817 case 0: 818 cpudata->vmcb->state.rax = svm_xcr0_mask & 0xFFFFFFFF; 819 if (cpudata->gxcr0 & XCR0_SSE) { 820 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct fxsave); 821 } else { 822 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct save87); 823 } 824 cpudata->gprs[NVMM_X64_GPR_RBX] += 64; /* XSAVE header */ 825 cpudata->gprs[NVMM_X64_GPR_RCX] = sizeof(struct fxsave) + 64; 826 cpudata->gprs[NVMM_X64_GPR_RDX] = svm_xcr0_mask >> 32; 827 break; 828 case 1: 829 cpudata->vmcb->state.rax &= ~CPUID_PES1_XSAVES; 830 break; 831 } 832 break; 833 case 0x40000000: 834 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 835 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 836 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 837 memcpy(&cpudata->gprs[NVMM_X64_GPR_RBX], "___ ", 4); 838 memcpy(&cpudata->gprs[NVMM_X64_GPR_RCX], "NVMM", 4); 839 memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); 840 break; 841 case 0x80000001: 842 cpudata->vmcb->state.rax &= nvmm_cpuid_80000001.eax; 843 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000001.ebx; 844 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000001.ecx; 845 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000001.edx; 846 break; 847 default: 848 break; 849 } 850} 851 852static void 853svm_exit_insn(struct vmcb *vmcb, struct nvmm_vcpu_exit *exit, uint64_t reason) 854{ 855 exit->u.insn.npc = vmcb->ctrl.nrip; 856 exit->reason = reason; 857} 858 859static void 860svm_exit_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 861 struct nvmm_vcpu_exit *exit) 862{ 863 struct svm_cpudata *cpudata = vcpu->cpudata; 864 struct nvmm_vcpu_conf_cpuid *cpuid; 865 uint64_t eax, ecx; 866 u_int descs[4]; 867 size_t i; 868 869 eax = cpudata->vmcb->state.rax; 870 ecx = cpudata->gprs[NVMM_X64_GPR_RCX]; 871 x86_cpuid2(eax, ecx, descs); 872 873 cpudata->vmcb->state.rax = descs[0]; 874 cpudata->gprs[NVMM_X64_GPR_RBX] = descs[1]; 875 cpudata->gprs[NVMM_X64_GPR_RCX] = descs[2]; 876 cpudata->gprs[NVMM_X64_GPR_RDX] = descs[3]; 877 878 svm_inkernel_handle_cpuid(vcpu, eax, ecx); 879 880 for (i = 0; i < SVM_NCPUIDS; i++) { 881 if (!cpudata->cpuidpresent[i]) { 882 continue; 883 } 884 cpuid = &cpudata->cpuid[i]; 885 if (cpuid->leaf != eax) { 886 continue; 887 } 888 889 if (cpuid->exit) { 890 svm_exit_insn(cpudata->vmcb, exit, NVMM_VCPU_EXIT_CPUID); 891 return; 892 } 893 KASSERT(cpuid->mask); 894 895 /* del */ 896 cpudata->vmcb->state.rax &= ~cpuid->u.mask.del.eax; 897 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~cpuid->u.mask.del.ebx; 898 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~cpuid->u.mask.del.ecx; 899 cpudata->gprs[NVMM_X64_GPR_RDX] &= ~cpuid->u.mask.del.edx; 900 901 /* set */ 902 cpudata->vmcb->state.rax |= cpuid->u.mask.set.eax; 903 cpudata->gprs[NVMM_X64_GPR_RBX] |= cpuid->u.mask.set.ebx; 904 cpudata->gprs[NVMM_X64_GPR_RCX] |= cpuid->u.mask.set.ecx; 905 cpudata->gprs[NVMM_X64_GPR_RDX] |= cpuid->u.mask.set.edx; 906 907 break; 908 } 909 910 svm_inkernel_advance(cpudata->vmcb); 911 exit->reason = NVMM_VCPU_EXIT_NONE; 912} 913 914static void 915svm_exit_hlt(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 916 struct nvmm_vcpu_exit *exit) 917{ 918 struct svm_cpudata *cpudata = vcpu->cpudata; 919 struct vmcb *vmcb = cpudata->vmcb; 920 921 if (cpudata->int_window_exit && (vmcb->state.rflags & PSL_I)) { 922 svm_event_waitexit_disable(vcpu, false); 923 } 924 925 svm_inkernel_advance(cpudata->vmcb); 926 exit->reason = NVMM_VCPU_EXIT_HALTED; 927} 928 929#define SVM_EXIT_IO_PORT __BITS(31,16) 930#define SVM_EXIT_IO_SEG __BITS(12,10) 931#define SVM_EXIT_IO_A64 __BIT(9) 932#define SVM_EXIT_IO_A32 __BIT(8) 933#define SVM_EXIT_IO_A16 __BIT(7) 934#define SVM_EXIT_IO_SZ32 __BIT(6) 935#define SVM_EXIT_IO_SZ16 __BIT(5) 936#define SVM_EXIT_IO_SZ8 __BIT(4) 937#define SVM_EXIT_IO_REP __BIT(3) 938#define SVM_EXIT_IO_STR __BIT(2) 939#define SVM_EXIT_IO_IN __BIT(0) 940 941static void 942svm_exit_io(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 943 struct nvmm_vcpu_exit *exit) 944{ 945 struct svm_cpudata *cpudata = vcpu->cpudata; 946 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 947 uint64_t nextpc = cpudata->vmcb->ctrl.exitinfo2; 948 949 exit->reason = NVMM_VCPU_EXIT_IO; 950 951 exit->u.io.in = (info & SVM_EXIT_IO_IN) != 0; 952 exit->u.io.port = __SHIFTOUT(info, SVM_EXIT_IO_PORT); 953 954 if (svm_decode_assist) { 955 KASSERT(__SHIFTOUT(info, SVM_EXIT_IO_SEG) < 6); 956 exit->u.io.seg = __SHIFTOUT(info, SVM_EXIT_IO_SEG); 957 } else { 958 exit->u.io.seg = -1; 959 } 960 961 if (info & SVM_EXIT_IO_A64) { 962 exit->u.io.address_size = 8; 963 } else if (info & SVM_EXIT_IO_A32) { 964 exit->u.io.address_size = 4; 965 } else if (info & SVM_EXIT_IO_A16) { 966 exit->u.io.address_size = 2; 967 } 968 969 if (info & SVM_EXIT_IO_SZ32) { 970 exit->u.io.operand_size = 4; 971 } else if (info & SVM_EXIT_IO_SZ16) { 972 exit->u.io.operand_size = 2; 973 } else if (info & SVM_EXIT_IO_SZ8) { 974 exit->u.io.operand_size = 1; 975 } 976 977 exit->u.io.rep = (info & SVM_EXIT_IO_REP) != 0; 978 exit->u.io.str = (info & SVM_EXIT_IO_STR) != 0; 979 exit->u.io.npc = nextpc; 980 981 svm_vcpu_state_provide(vcpu, 982 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 983 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 984} 985 986static const uint64_t msr_ignore_list[] = { 987 0xc0010055, /* MSR_CMPHALT */ 988 MSR_DE_CFG, 989 MSR_IC_CFG, 990 MSR_UCODE_AMD_PATCHLEVEL 991}; 992 993static bool 994svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 995 struct nvmm_vcpu_exit *exit) 996{ 997 struct svm_cpudata *cpudata = vcpu->cpudata; 998 struct vmcb *vmcb = cpudata->vmcb; 999 uint64_t val; 1000 size_t i; 1001 1002 if (exit->reason == NVMM_VCPU_EXIT_RDMSR) { 1003 if (exit->u.rdmsr.msr == MSR_NB_CFG) { 1004 val = NB_CFG_INITAPICCPUIDLO; 1005 vmcb->state.rax = (val & 0xFFFFFFFF); 1006 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1007 goto handled; 1008 } 1009 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1010 if (msr_ignore_list[i] != exit->u.rdmsr.msr) 1011 continue; 1012 val = 0; 1013 vmcb->state.rax = (val & 0xFFFFFFFF); 1014 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1015 goto handled; 1016 } 1017 } else { 1018 if (exit->u.wrmsr.msr == MSR_EFER) { 1019 if (__predict_false(exit->u.wrmsr.val & ~EFER_VALID)) { 1020 goto error; 1021 } 1022 if ((vmcb->state.efer ^ exit->u.wrmsr.val) & 1023 EFER_TLB_FLUSH) { 1024 cpudata->gtlb_want_flush = true; 1025 } 1026 vmcb->state.efer = exit->u.wrmsr.val | EFER_SVME; 1027 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_CR); 1028 goto handled; 1029 } 1030 if (exit->u.wrmsr.msr == MSR_TSC) { 1031 cpudata->gtsc = exit->u.wrmsr.val; 1032 cpudata->gtsc_want_update = true; 1033 goto handled; 1034 } 1035 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1036 if (msr_ignore_list[i] != exit->u.wrmsr.msr) 1037 continue; 1038 goto handled; 1039 } 1040 } 1041 1042 return false; 1043 1044handled: 1045 svm_inkernel_advance(cpudata->vmcb); 1046 return true; 1047 1048error: 1049 svm_inject_gp(vcpu); 1050 return true; 1051} 1052 1053static inline void 1054svm_exit_rdmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1055 struct nvmm_vcpu_exit *exit) 1056{ 1057 struct svm_cpudata *cpudata = vcpu->cpudata; 1058 1059 exit->reason = NVMM_VCPU_EXIT_RDMSR; 1060 exit->u.rdmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1061 exit->u.rdmsr.npc = cpudata->vmcb->ctrl.nrip; 1062 1063 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1064 exit->reason = NVMM_VCPU_EXIT_NONE; 1065 return; 1066 } 1067 1068 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1069} 1070 1071static inline void 1072svm_exit_wrmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1073 struct nvmm_vcpu_exit *exit) 1074{ 1075 struct svm_cpudata *cpudata = vcpu->cpudata; 1076 uint64_t rdx, rax; 1077 1078 rdx = cpudata->gprs[NVMM_X64_GPR_RDX]; 1079 rax = cpudata->vmcb->state.rax; 1080 1081 exit->reason = NVMM_VCPU_EXIT_WRMSR; 1082 exit->u.wrmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1083 exit->u.wrmsr.val = (rdx << 32) | (rax & 0xFFFFFFFF); 1084 exit->u.wrmsr.npc = cpudata->vmcb->ctrl.nrip; 1085 1086 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1087 exit->reason = NVMM_VCPU_EXIT_NONE; 1088 return; 1089 } 1090 1091 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1092} 1093 1094static void 1095svm_exit_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1096 struct nvmm_vcpu_exit *exit) 1097{ 1098 struct svm_cpudata *cpudata = vcpu->cpudata; 1099 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1100 1101 if (info == 0) { 1102 svm_exit_rdmsr(mach, vcpu, exit); 1103 } else { 1104 svm_exit_wrmsr(mach, vcpu, exit); 1105 } 1106} 1107 1108static void 1109svm_exit_npf(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1110 struct nvmm_vcpu_exit *exit) 1111{ 1112 struct svm_cpudata *cpudata = vcpu->cpudata; 1113 gpaddr_t gpa = cpudata->vmcb->ctrl.exitinfo2; 1114 1115 exit->reason = NVMM_VCPU_EXIT_MEMORY; 1116 if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_W) 1117 exit->u.mem.prot = PROT_WRITE; 1118 else if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_X) 1119 exit->u.mem.prot = PROT_EXEC; 1120 else 1121 exit->u.mem.prot = PROT_READ; 1122 exit->u.mem.gpa = gpa; 1123 exit->u.mem.inst_len = cpudata->vmcb->ctrl.inst_len; 1124 memcpy(exit->u.mem.inst_bytes, cpudata->vmcb->ctrl.inst_bytes, 1125 sizeof(exit->u.mem.inst_bytes)); 1126 1127 svm_vcpu_state_provide(vcpu, 1128 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1129 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1130} 1131 1132static void 1133svm_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1134 struct nvmm_vcpu_exit *exit) 1135{ 1136 struct svm_cpudata *cpudata = vcpu->cpudata; 1137 struct vmcb *vmcb = cpudata->vmcb; 1138 uint64_t val; 1139 1140 exit->reason = NVMM_VCPU_EXIT_NONE; 1141 1142 val = (cpudata->gprs[NVMM_X64_GPR_RDX] << 32) | 1143 (vmcb->state.rax & 0xFFFFFFFF); 1144 1145 if (__predict_false(cpudata->gprs[NVMM_X64_GPR_RCX] != 0)) { 1146 goto error; 1147 } else if (__predict_false(vmcb->state.cpl != 0)) { 1148 goto error; 1149 } else if (__predict_false((val & ~svm_xcr0_mask) != 0)) { 1150 goto error; 1151 } else if (__predict_false((val & XCR0_X87) == 0)) { 1152 goto error; 1153 } 1154 1155 cpudata->gxcr0 = val; 1156 if (svm_xcr0_mask != 0) { 1157 wrxcr(0, cpudata->gxcr0); 1158 } 1159 1160 svm_inkernel_advance(cpudata->vmcb); 1161 return; 1162 1163error: 1164 svm_inject_gp(vcpu); 1165} 1166 1167static void 1168svm_exit_invalid(struct nvmm_vcpu_exit *exit, uint64_t code) 1169{ 1170 exit->u.inv.hwcode = code; 1171 exit->reason = NVMM_VCPU_EXIT_INVALID; 1172} 1173 1174/* -------------------------------------------------------------------------- */ 1175 1176static void 1177svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) 1178{ 1179 struct svm_cpudata *cpudata = vcpu->cpudata; 1180 1181 fpu_save(); 1182 fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask); 1183 1184 if (svm_xcr0_mask != 0) { 1185 cpudata->hxcr0 = rdxcr(0); 1186 wrxcr(0, cpudata->gxcr0); 1187 } 1188} 1189 1190static void 1191svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) 1192{ 1193 struct svm_cpudata *cpudata = vcpu->cpudata; 1194 1195 if (svm_xcr0_mask != 0) { 1196 cpudata->gxcr0 = rdxcr(0); 1197 wrxcr(0, cpudata->hxcr0); 1198 } 1199 1200 fpu_area_save(&cpudata->gfpu, svm_xcr0_mask); 1201} 1202 1203static void 1204svm_vcpu_guest_dbregs_enter(struct nvmm_cpu *vcpu) 1205{ 1206 struct svm_cpudata *cpudata = vcpu->cpudata; 1207 1208 x86_dbregs_save(curlwp); 1209 1210 ldr7(0); 1211 1212 ldr0(cpudata->drs[NVMM_X64_DR_DR0]); 1213 ldr1(cpudata->drs[NVMM_X64_DR_DR1]); 1214 ldr2(cpudata->drs[NVMM_X64_DR_DR2]); 1215 ldr3(cpudata->drs[NVMM_X64_DR_DR3]); 1216} 1217 1218static void 1219svm_vcpu_guest_dbregs_leave(struct nvmm_cpu *vcpu) 1220{ 1221 struct svm_cpudata *cpudata = vcpu->cpudata; 1222 1223 cpudata->drs[NVMM_X64_DR_DR0] = rdr0(); 1224 cpudata->drs[NVMM_X64_DR_DR1] = rdr1(); 1225 cpudata->drs[NVMM_X64_DR_DR2] = rdr2(); 1226 cpudata->drs[NVMM_X64_DR_DR3] = rdr3(); 1227 1228 x86_dbregs_restore(curlwp); 1229} 1230 1231static void 1232svm_vcpu_guest_misc_enter(struct nvmm_cpu *vcpu) 1233{ 1234 struct svm_cpudata *cpudata = vcpu->cpudata; 1235 1236 cpudata->fsbase = rdmsr(MSR_FSBASE); 1237 cpudata->kernelgsbase = rdmsr(MSR_KERNELGSBASE); 1238} 1239 1240static void 1241svm_vcpu_guest_misc_leave(struct nvmm_cpu *vcpu) 1242{ 1243 struct svm_cpudata *cpudata = vcpu->cpudata; 1244 1245 wrmsr(MSR_STAR, cpudata->star); 1246 wrmsr(MSR_LSTAR, cpudata->lstar); 1247 wrmsr(MSR_CSTAR, cpudata->cstar); 1248 wrmsr(MSR_SFMASK, cpudata->sfmask); 1249 wrmsr(MSR_FSBASE, cpudata->fsbase); 1250 wrmsr(MSR_KERNELGSBASE, cpudata->kernelgsbase); 1251} 1252 1253/* -------------------------------------------------------------------------- */ 1254 1255static inline void 1256svm_gtlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1257{ 1258 struct svm_cpudata *cpudata = vcpu->cpudata; 1259 1260 if (vcpu->hcpu_last != hcpu || cpudata->shared_asid) { 1261 cpudata->gtlb_want_flush = true; 1262 } 1263} 1264 1265static inline void 1266svm_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1267{ 1268 /* 1269 * Nothing to do. If an hTLB flush was needed, either the VCPU was 1270 * executing on this hCPU and the hTLB already got flushed, or it 1271 * was executing on another hCPU in which case the catchup is done 1272 * in svm_gtlb_catchup(). 1273 */ 1274} 1275 1276static inline uint64_t 1277svm_htlb_flush(struct svm_machdata *machdata, struct svm_cpudata *cpudata) 1278{ 1279 struct vmcb *vmcb = cpudata->vmcb; 1280 uint64_t machgen; 1281 1282 machgen = machdata->mach_htlb_gen; 1283 if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) { 1284 return machgen; 1285 } 1286 1287 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1288 return machgen; 1289} 1290 1291static inline void 1292svm_htlb_flush_ack(struct svm_cpudata *cpudata, uint64_t machgen) 1293{ 1294 struct vmcb *vmcb = cpudata->vmcb; 1295 1296 if (__predict_true(vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID)) { 1297 cpudata->vcpu_htlb_gen = machgen; 1298 } 1299} 1300 1301static inline void 1302svm_exit_evt(struct svm_cpudata *cpudata, struct vmcb *vmcb) 1303{ 1304 cpudata->evt_pending = false; 1305 1306 if (__predict_false(vmcb->ctrl.exitintinfo & VMCB_CTRL_EXITINTINFO_V)) { 1307 vmcb->ctrl.eventinj = vmcb->ctrl.exitintinfo; 1308 cpudata->evt_pending = true; 1309 } 1310} 1311 1312static int 1313svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1314 struct nvmm_vcpu_exit *exit) 1315{ 1316 struct nvmm_comm_page *comm = vcpu->comm; 1317 struct svm_machdata *machdata = mach->machdata; 1318 struct svm_cpudata *cpudata = vcpu->cpudata; 1319 struct vmcb *vmcb = cpudata->vmcb; 1320 uint64_t machgen; 1321 int hcpu, s; 1322 1323 if (__predict_false(svm_vcpu_event_commit(vcpu) != 0)) { 1324 return EINVAL; 1325 } 1326 svm_vcpu_state_commit(vcpu); 1327 comm->state_cached = 0; 1328 1329 kpreempt_disable(); 1330 hcpu = cpu_number(); 1331 1332 svm_gtlb_catchup(vcpu, hcpu); 1333 svm_htlb_catchup(vcpu, hcpu); 1334 1335 if (vcpu->hcpu_last != hcpu) { 1336 svm_vmcb_cache_flush_all(vmcb); 1337 cpudata->gtsc_want_update = true; 1338 } 1339 1340 svm_vcpu_guest_dbregs_enter(vcpu); 1341 svm_vcpu_guest_misc_enter(vcpu); 1342 svm_vcpu_guest_fpu_enter(vcpu); 1343 1344 while (1) { 1345 if (cpudata->gtlb_want_flush) { 1346 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1347 } else { 1348 vmcb->ctrl.tlb_ctrl = 0; 1349 } 1350 1351 if (__predict_false(cpudata->gtsc_want_update)) { 1352 vmcb->ctrl.tsc_offset = cpudata->gtsc - rdtsc(); 1353 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 1354 } 1355 1356 s = splhigh(); 1357 machgen = svm_htlb_flush(machdata, cpudata); 1358 svm_vmrun(cpudata->vmcb_pa, cpudata->gprs); 1359 svm_htlb_flush_ack(cpudata, machgen); 1360 splx(s); 1361 1362 svm_vmcb_cache_default(vmcb); 1363 1364 if (vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID) { 1365 cpudata->gtlb_want_flush = false; 1366 cpudata->gtsc_want_update = false; 1367 vcpu->hcpu_last = hcpu; 1368 } 1369 svm_exit_evt(cpudata, vmcb); 1370 1371 switch (vmcb->ctrl.exitcode) { 1372 case VMCB_EXITCODE_INTR: 1373 case VMCB_EXITCODE_NMI: 1374 exit->reason = NVMM_VCPU_EXIT_NONE; 1375 break; 1376 case VMCB_EXITCODE_VINTR: 1377 svm_event_waitexit_disable(vcpu, false); 1378 exit->reason = NVMM_VCPU_EXIT_INT_READY; 1379 break; 1380 case VMCB_EXITCODE_IRET: 1381 svm_event_waitexit_disable(vcpu, true); 1382 exit->reason = NVMM_VCPU_EXIT_NMI_READY; 1383 break; 1384 case VMCB_EXITCODE_CPUID: 1385 svm_exit_cpuid(mach, vcpu, exit); 1386 break; 1387 case VMCB_EXITCODE_HLT: 1388 svm_exit_hlt(mach, vcpu, exit); 1389 break; 1390 case VMCB_EXITCODE_IOIO: 1391 svm_exit_io(mach, vcpu, exit); 1392 break; 1393 case VMCB_EXITCODE_MSR: 1394 svm_exit_msr(mach, vcpu, exit); 1395 break; 1396 case VMCB_EXITCODE_SHUTDOWN: 1397 exit->reason = NVMM_VCPU_EXIT_SHUTDOWN; 1398 break; 1399 case VMCB_EXITCODE_RDPMC: 1400 case VMCB_EXITCODE_RSM: 1401 case VMCB_EXITCODE_INVLPGA: 1402 case VMCB_EXITCODE_VMRUN: 1403 case VMCB_EXITCODE_VMMCALL: 1404 case VMCB_EXITCODE_VMLOAD: 1405 case VMCB_EXITCODE_VMSAVE: 1406 case VMCB_EXITCODE_STGI: 1407 case VMCB_EXITCODE_CLGI: 1408 case VMCB_EXITCODE_SKINIT: 1409 case VMCB_EXITCODE_RDTSCP: 1410 svm_inject_ud(vcpu); 1411 exit->reason = NVMM_VCPU_EXIT_NONE; 1412 break; 1413 case VMCB_EXITCODE_MONITOR: 1414 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MONITOR); 1415 break; 1416 case VMCB_EXITCODE_MWAIT: 1417 case VMCB_EXITCODE_MWAIT_CONDITIONAL: 1418 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MWAIT); 1419 break; 1420 case VMCB_EXITCODE_XSETBV: 1421 svm_exit_xsetbv(mach, vcpu, exit); 1422 break; 1423 case VMCB_EXITCODE_NPF: 1424 svm_exit_npf(mach, vcpu, exit); 1425 break; 1426 case VMCB_EXITCODE_FERR_FREEZE: /* ? */ 1427 default: 1428 svm_exit_invalid(exit, vmcb->ctrl.exitcode); 1429 break; 1430 } 1431 1432 /* If no reason to return to userland, keep rolling. */ 1433 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) { 1434 break; 1435 } 1436 if (curcpu()->ci_data.cpu_softints != 0) { 1437 break; 1438 } 1439 if (curlwp->l_flag & LW_USERRET) { 1440 break; 1441 } 1442 if (exit->reason != NVMM_VCPU_EXIT_NONE) { 1443 break; 1444 } 1445 } 1446 1447 cpudata->gtsc = rdtsc() + vmcb->ctrl.tsc_offset; 1448 1449 svm_vcpu_guest_fpu_leave(vcpu); 1450 svm_vcpu_guest_misc_leave(vcpu); 1451 svm_vcpu_guest_dbregs_leave(vcpu); 1452 1453 kpreempt_enable(); 1454 1455 exit->exitstate[NVMM_X64_EXITSTATE_CR8] = __SHIFTOUT(vmcb->ctrl.v, 1456 VMCB_CTRL_V_TPR); 1457 exit->exitstate[NVMM_X64_EXITSTATE_RFLAGS] = vmcb->state.rflags; 1458 1459 exit->exitstate[NVMM_X64_EXITSTATE_INT_SHADOW] = 1460 ((vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0); 1461 exit->exitstate[NVMM_X64_EXITSTATE_INT_WINDOW_EXIT] = 1462 cpudata->int_window_exit; 1463 exit->exitstate[NVMM_X64_EXITSTATE_NMI_WINDOW_EXIT] = 1464 cpudata->nmi_window_exit; 1465 exit->exitstate[NVMM_X64_EXITSTATE_EVT_PENDING] = 1466 cpudata->evt_pending; 1467 1468 return 0; 1469} 1470 1471/* -------------------------------------------------------------------------- */ 1472 1473static int 1474svm_memalloc(paddr_t *pa, vaddr_t *va, size_t npages) 1475{ 1476 struct pglist pglist; 1477 paddr_t _pa; 1478 vaddr_t _va; 1479 size_t i; 1480 int ret; 1481 1482 ret = uvm_pglistalloc(npages * PAGE_SIZE, 0, ~0UL, PAGE_SIZE, 0, 1483 &pglist, 1, 0); 1484 if (ret != 0) 1485 return ENOMEM; 1486 _pa = TAILQ_FIRST(&pglist)->phys_addr; 1487 _va = uvm_km_alloc(kernel_map, npages * PAGE_SIZE, 0, 1488 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 1489 if (_va == 0) 1490 goto error; 1491 1492 for (i = 0; i < npages; i++) { 1493 pmap_kenter_pa(_va + i * PAGE_SIZE, _pa + i * PAGE_SIZE, 1494 VM_PROT_READ | VM_PROT_WRITE, PMAP_WRITE_BACK); 1495 } 1496 pmap_update(pmap_kernel()); 1497 1498 memset((void *)_va, 0, npages * PAGE_SIZE); 1499 1500 *pa = _pa; 1501 *va = _va; 1502 return 0; 1503 1504error: 1505 for (i = 0; i < npages; i++) { 1506 uvm_pagefree(PHYS_TO_VM_PAGE(_pa + i * PAGE_SIZE)); 1507 } 1508 return ENOMEM; 1509} 1510 1511static void 1512svm_memfree(paddr_t pa, vaddr_t va, size_t npages) 1513{ 1514 size_t i; 1515 1516 pmap_kremove(va, npages * PAGE_SIZE); 1517 pmap_update(pmap_kernel()); 1518 uvm_km_free(kernel_map, va, npages * PAGE_SIZE, UVM_KMF_VAONLY); 1519 for (i = 0; i < npages; i++) { 1520 uvm_pagefree(PHYS_TO_VM_PAGE(pa + i * PAGE_SIZE)); 1521 } 1522} 1523 1524/* -------------------------------------------------------------------------- */ 1525 1526#define SVM_MSRBM_READ __BIT(0) 1527#define SVM_MSRBM_WRITE __BIT(1) 1528 1529static void 1530svm_vcpu_msr_allow(uint8_t *bitmap, uint64_t msr, bool read, bool write) 1531{ 1532 uint64_t byte; 1533 uint8_t bitoff; 1534 1535 if (msr < 0x00002000) { 1536 /* Range 1 */ 1537 byte = ((msr - 0x00000000) >> 2UL) + 0x0000; 1538 } else if (msr >= 0xC0000000 && msr < 0xC0002000) { 1539 /* Range 2 */ 1540 byte = ((msr - 0xC0000000) >> 2UL) + 0x0800; 1541 } else if (msr >= 0xC0010000 && msr < 0xC0012000) { 1542 /* Range 3 */ 1543 byte = ((msr - 0xC0010000) >> 2UL) + 0x1000; 1544 } else { 1545 panic("%s: wrong range", __func__); 1546 } 1547 1548 bitoff = (msr & 0x3) << 1; 1549 1550 if (read) { 1551 bitmap[byte] &= ~(SVM_MSRBM_READ << bitoff); 1552 } 1553 if (write) { 1554 bitmap[byte] &= ~(SVM_MSRBM_WRITE << bitoff); 1555 } 1556} 1557 1558#define SVM_SEG_ATTRIB_TYPE __BITS(3,0) 1559#define SVM_SEG_ATTRIB_S __BIT(4) 1560#define SVM_SEG_ATTRIB_DPL __BITS(6,5) 1561#define SVM_SEG_ATTRIB_P __BIT(7) 1562#define SVM_SEG_ATTRIB_AVL __BIT(8) 1563#define SVM_SEG_ATTRIB_L __BIT(9) 1564#define SVM_SEG_ATTRIB_DEF __BIT(10) 1565#define SVM_SEG_ATTRIB_G __BIT(11) 1566 1567static void 1568svm_vcpu_setstate_seg(const struct nvmm_x64_state_seg *seg, 1569 struct vmcb_segment *vseg) 1570{ 1571 vseg->selector = seg->selector; 1572 vseg->attrib = 1573 __SHIFTIN(seg->attrib.type, SVM_SEG_ATTRIB_TYPE) | 1574 __SHIFTIN(seg->attrib.s, SVM_SEG_ATTRIB_S) | 1575 __SHIFTIN(seg->attrib.dpl, SVM_SEG_ATTRIB_DPL) | 1576 __SHIFTIN(seg->attrib.p, SVM_SEG_ATTRIB_P) | 1577 __SHIFTIN(seg->attrib.avl, SVM_SEG_ATTRIB_AVL) | 1578 __SHIFTIN(seg->attrib.l, SVM_SEG_ATTRIB_L) | 1579 __SHIFTIN(seg->attrib.def, SVM_SEG_ATTRIB_DEF) | 1580 __SHIFTIN(seg->attrib.g, SVM_SEG_ATTRIB_G); 1581 vseg->limit = seg->limit; 1582 vseg->base = seg->base; 1583} 1584 1585static void 1586svm_vcpu_getstate_seg(struct nvmm_x64_state_seg *seg, struct vmcb_segment *vseg) 1587{ 1588 seg->selector = vseg->selector; 1589 seg->attrib.type = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_TYPE); 1590 seg->attrib.s = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_S); 1591 seg->attrib.dpl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DPL); 1592 seg->attrib.p = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_P); 1593 seg->attrib.avl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_AVL); 1594 seg->attrib.l = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_L); 1595 seg->attrib.def = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DEF); 1596 seg->attrib.g = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_G); 1597 seg->limit = vseg->limit; 1598 seg->base = vseg->base; 1599} 1600 1601static inline bool 1602svm_state_tlb_flush(const struct vmcb *vmcb, const struct nvmm_x64_state *state, 1603 uint64_t flags) 1604{ 1605 if (flags & NVMM_X64_STATE_CRS) { 1606 if ((vmcb->state.cr0 ^ 1607 state->crs[NVMM_X64_CR_CR0]) & CR0_TLB_FLUSH) { 1608 return true; 1609 } 1610 if (vmcb->state.cr3 != state->crs[NVMM_X64_CR_CR3]) { 1611 return true; 1612 } 1613 if ((vmcb->state.cr4 ^ 1614 state->crs[NVMM_X64_CR_CR4]) & CR4_TLB_FLUSH) { 1615 return true; 1616 } 1617 } 1618 1619 if (flags & NVMM_X64_STATE_MSRS) { 1620 if ((vmcb->state.efer ^ 1621 state->msrs[NVMM_X64_MSR_EFER]) & EFER_TLB_FLUSH) { 1622 return true; 1623 } 1624 } 1625 1626 return false; 1627} 1628 1629static void 1630svm_vcpu_setstate(struct nvmm_cpu *vcpu) 1631{ 1632 struct nvmm_comm_page *comm = vcpu->comm; 1633 const struct nvmm_x64_state *state = &comm->state; 1634 struct svm_cpudata *cpudata = vcpu->cpudata; 1635 struct vmcb *vmcb = cpudata->vmcb; 1636 struct fxsave *fpustate; 1637 uint64_t flags; 1638 1639 flags = comm->state_wanted; 1640 1641 if (svm_state_tlb_flush(vmcb, state, flags)) { 1642 cpudata->gtlb_want_flush = true; 1643 } 1644 1645 if (flags & NVMM_X64_STATE_SEGS) { 1646 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_CS], 1647 &vmcb->state.cs); 1648 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_DS], 1649 &vmcb->state.ds); 1650 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_ES], 1651 &vmcb->state.es); 1652 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_FS], 1653 &vmcb->state.fs); 1654 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GS], 1655 &vmcb->state.gs); 1656 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_SS], 1657 &vmcb->state.ss); 1658 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1659 &vmcb->state.gdt); 1660 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1661 &vmcb->state.idt); 1662 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1663 &vmcb->state.ldt); 1664 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_TR], 1665 &vmcb->state.tr); 1666 1667 vmcb->state.cpl = state->segs[NVMM_X64_SEG_SS].attrib.dpl; 1668 } 1669 1670 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1671 if (flags & NVMM_X64_STATE_GPRS) { 1672 memcpy(cpudata->gprs, state->gprs, sizeof(state->gprs)); 1673 1674 vmcb->state.rip = state->gprs[NVMM_X64_GPR_RIP]; 1675 vmcb->state.rsp = state->gprs[NVMM_X64_GPR_RSP]; 1676 vmcb->state.rax = state->gprs[NVMM_X64_GPR_RAX]; 1677 vmcb->state.rflags = state->gprs[NVMM_X64_GPR_RFLAGS]; 1678 } 1679 1680 if (flags & NVMM_X64_STATE_CRS) { 1681 vmcb->state.cr0 = state->crs[NVMM_X64_CR_CR0]; 1682 vmcb->state.cr2 = state->crs[NVMM_X64_CR_CR2]; 1683 vmcb->state.cr3 = state->crs[NVMM_X64_CR_CR3]; 1684 vmcb->state.cr4 = state->crs[NVMM_X64_CR_CR4]; 1685 1686 vmcb->ctrl.v &= ~VMCB_CTRL_V_TPR; 1687 vmcb->ctrl.v |= __SHIFTIN(state->crs[NVMM_X64_CR_CR8], 1688 VMCB_CTRL_V_TPR); 1689 1690 if (svm_xcr0_mask != 0) { 1691 /* Clear illegal XCR0 bits, set mandatory X87 bit. */ 1692 cpudata->gxcr0 = state->crs[NVMM_X64_CR_XCR0]; 1693 cpudata->gxcr0 &= svm_xcr0_mask; 1694 cpudata->gxcr0 |= XCR0_X87; 1695 } 1696 } 1697 1698 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1699 if (flags & NVMM_X64_STATE_DRS) { 1700 memcpy(cpudata->drs, state->drs, sizeof(state->drs)); 1701 1702 vmcb->state.dr6 = state->drs[NVMM_X64_DR_DR6]; 1703 vmcb->state.dr7 = state->drs[NVMM_X64_DR_DR7]; 1704 } 1705 1706 if (flags & NVMM_X64_STATE_MSRS) { 1707 /* 1708 * EFER_SVME is mandatory. 1709 */ 1710 vmcb->state.efer = state->msrs[NVMM_X64_MSR_EFER] | EFER_SVME; 1711 vmcb->state.star = state->msrs[NVMM_X64_MSR_STAR]; 1712 vmcb->state.lstar = state->msrs[NVMM_X64_MSR_LSTAR]; 1713 vmcb->state.cstar = state->msrs[NVMM_X64_MSR_CSTAR]; 1714 vmcb->state.sfmask = state->msrs[NVMM_X64_MSR_SFMASK]; 1715 vmcb->state.kernelgsbase = 1716 state->msrs[NVMM_X64_MSR_KERNELGSBASE]; 1717 vmcb->state.sysenter_cs = 1718 state->msrs[NVMM_X64_MSR_SYSENTER_CS]; 1719 vmcb->state.sysenter_esp = 1720 state->msrs[NVMM_X64_MSR_SYSENTER_ESP]; 1721 vmcb->state.sysenter_eip = 1722 state->msrs[NVMM_X64_MSR_SYSENTER_EIP]; 1723 vmcb->state.g_pat = state->msrs[NVMM_X64_MSR_PAT]; 1724 1725 cpudata->gtsc = state->msrs[NVMM_X64_MSR_TSC]; 1726 cpudata->gtsc_want_update = true; 1727 } 1728 1729 if (flags & NVMM_X64_STATE_INTR) { 1730 if (state->intr.int_shadow) { 1731 vmcb->ctrl.intr |= VMCB_CTRL_INTR_SHADOW; 1732 } else { 1733 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 1734 } 1735 1736 if (state->intr.int_window_exiting) { 1737 svm_event_waitexit_enable(vcpu, false); 1738 } else { 1739 svm_event_waitexit_disable(vcpu, false); 1740 } 1741 1742 if (state->intr.nmi_window_exiting) { 1743 svm_event_waitexit_enable(vcpu, true); 1744 } else { 1745 svm_event_waitexit_disable(vcpu, true); 1746 } 1747 } 1748 1749 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 1750 if (flags & NVMM_X64_STATE_FPU) { 1751 memcpy(cpudata->gfpu.xsh_fxsave, &state->fpu, 1752 sizeof(state->fpu)); 1753 1754 fpustate = (struct fxsave *)cpudata->gfpu.xsh_fxsave; 1755 fpustate->fx_mxcsr_mask &= x86_fpu_mxcsr_mask; 1756 fpustate->fx_mxcsr &= fpustate->fx_mxcsr_mask; 1757 1758 if (svm_xcr0_mask != 0) { 1759 /* Reset XSTATE_BV, to force a reload. */ 1760 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 1761 } 1762 } 1763 1764 svm_vmcb_cache_update(vmcb, flags); 1765 1766 comm->state_wanted = 0; 1767 comm->state_cached |= flags; 1768} 1769 1770static void 1771svm_vcpu_getstate(struct nvmm_cpu *vcpu) 1772{ 1773 struct nvmm_comm_page *comm = vcpu->comm; 1774 struct nvmm_x64_state *state = &comm->state; 1775 struct svm_cpudata *cpudata = vcpu->cpudata; 1776 struct vmcb *vmcb = cpudata->vmcb; 1777 uint64_t flags; 1778 1779 flags = comm->state_wanted; 1780 1781 if (flags & NVMM_X64_STATE_SEGS) { 1782 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_CS], 1783 &vmcb->state.cs); 1784 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_DS], 1785 &vmcb->state.ds); 1786 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_ES], 1787 &vmcb->state.es); 1788 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_FS], 1789 &vmcb->state.fs); 1790 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GS], 1791 &vmcb->state.gs); 1792 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_SS], 1793 &vmcb->state.ss); 1794 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1795 &vmcb->state.gdt); 1796 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1797 &vmcb->state.idt); 1798 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1799 &vmcb->state.ldt); 1800 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_TR], 1801 &vmcb->state.tr); 1802 1803 state->segs[NVMM_X64_SEG_SS].attrib.dpl = vmcb->state.cpl; 1804 } 1805 1806 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1807 if (flags & NVMM_X64_STATE_GPRS) { 1808 memcpy(state->gprs, cpudata->gprs, sizeof(state->gprs)); 1809 1810 state->gprs[NVMM_X64_GPR_RIP] = vmcb->state.rip; 1811 state->gprs[NVMM_X64_GPR_RSP] = vmcb->state.rsp; 1812 state->gprs[NVMM_X64_GPR_RAX] = vmcb->state.rax; 1813 state->gprs[NVMM_X64_GPR_RFLAGS] = vmcb->state.rflags; 1814 } 1815 1816 if (flags & NVMM_X64_STATE_CRS) { 1817 state->crs[NVMM_X64_CR_CR0] = vmcb->state.cr0; 1818 state->crs[NVMM_X64_CR_CR2] = vmcb->state.cr2; 1819 state->crs[NVMM_X64_CR_CR3] = vmcb->state.cr3; 1820 state->crs[NVMM_X64_CR_CR4] = vmcb->state.cr4; 1821 state->crs[NVMM_X64_CR_CR8] = __SHIFTOUT(vmcb->ctrl.v, 1822 VMCB_CTRL_V_TPR); 1823 state->crs[NVMM_X64_CR_XCR0] = cpudata->gxcr0; 1824 } 1825 1826 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1827 if (flags & NVMM_X64_STATE_DRS) { 1828 memcpy(state->drs, cpudata->drs, sizeof(state->drs)); 1829 1830 state->drs[NVMM_X64_DR_DR6] = vmcb->state.dr6; 1831 state->drs[NVMM_X64_DR_DR7] = vmcb->state.dr7; 1832 } 1833 1834 if (flags & NVMM_X64_STATE_MSRS) { 1835 state->msrs[NVMM_X64_MSR_EFER] = vmcb->state.efer; 1836 state->msrs[NVMM_X64_MSR_STAR] = vmcb->state.star; 1837 state->msrs[NVMM_X64_MSR_LSTAR] = vmcb->state.lstar; 1838 state->msrs[NVMM_X64_MSR_CSTAR] = vmcb->state.cstar; 1839 state->msrs[NVMM_X64_MSR_SFMASK] = vmcb->state.sfmask; 1840 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = 1841 vmcb->state.kernelgsbase; 1842 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = 1843 vmcb->state.sysenter_cs; 1844 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = 1845 vmcb->state.sysenter_esp; 1846 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = 1847 vmcb->state.sysenter_eip; 1848 state->msrs[NVMM_X64_MSR_PAT] = vmcb->state.g_pat; 1849 state->msrs[NVMM_X64_MSR_TSC] = cpudata->gtsc; 1850 1851 /* Hide SVME. */ 1852 state->msrs[NVMM_X64_MSR_EFER] &= ~EFER_SVME; 1853 } 1854 1855 if (flags & NVMM_X64_STATE_INTR) { 1856 state->intr.int_shadow = 1857 (vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0; 1858 state->intr.int_window_exiting = cpudata->int_window_exit; 1859 state->intr.nmi_window_exiting = cpudata->nmi_window_exit; 1860 state->intr.evt_pending = cpudata->evt_pending; 1861 } 1862 1863 CTASSERT(sizeof(cpudata->gfpu.xsh_fxsave) == sizeof(state->fpu)); 1864 if (flags & NVMM_X64_STATE_FPU) { 1865 memcpy(&state->fpu, cpudata->gfpu.xsh_fxsave, 1866 sizeof(state->fpu)); 1867 } 1868 1869 comm->state_wanted = 0; 1870 comm->state_cached |= flags; 1871} 1872 1873static void 1874svm_vcpu_state_provide(struct nvmm_cpu *vcpu, uint64_t flags) 1875{ 1876 vcpu->comm->state_wanted = flags; 1877 svm_vcpu_getstate(vcpu); 1878} 1879 1880static void 1881svm_vcpu_state_commit(struct nvmm_cpu *vcpu) 1882{ 1883 vcpu->comm->state_wanted = vcpu->comm->state_commit; 1884 vcpu->comm->state_commit = 0; 1885 svm_vcpu_setstate(vcpu); 1886} 1887 1888/* -------------------------------------------------------------------------- */ 1889 1890static void 1891svm_asid_alloc(struct nvmm_cpu *vcpu) 1892{ 1893 struct svm_cpudata *cpudata = vcpu->cpudata; 1894 struct vmcb *vmcb = cpudata->vmcb; 1895 size_t i, oct, bit; 1896 1897 mutex_enter(&svm_asidlock); 1898 1899 for (i = 0; i < svm_maxasid; i++) { 1900 oct = i / 8; 1901 bit = i % 8; 1902 1903 if (svm_asidmap[oct] & __BIT(bit)) { 1904 continue; 1905 } 1906 1907 svm_asidmap[oct] |= __BIT(bit); 1908 vmcb->ctrl.guest_asid = i; 1909 mutex_exit(&svm_asidlock); 1910 return; 1911 } 1912 1913 /* 1914 * No free ASID. Use the last one, which is shared and requires 1915 * special TLB handling. 1916 */ 1917 cpudata->shared_asid = true; 1918 vmcb->ctrl.guest_asid = svm_maxasid - 1; 1919 mutex_exit(&svm_asidlock); 1920} 1921 1922static void 1923svm_asid_free(struct nvmm_cpu *vcpu) 1924{ 1925 struct svm_cpudata *cpudata = vcpu->cpudata; 1926 struct vmcb *vmcb = cpudata->vmcb; 1927 size_t oct, bit; 1928 1929 if (cpudata->shared_asid) { 1930 return; 1931 } 1932 1933 oct = vmcb->ctrl.guest_asid / 8; 1934 bit = vmcb->ctrl.guest_asid % 8; 1935 1936 mutex_enter(&svm_asidlock); 1937 svm_asidmap[oct] &= ~__BIT(bit); 1938 mutex_exit(&svm_asidlock); 1939} 1940 1941static void 1942svm_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 1943{ 1944 struct svm_cpudata *cpudata = vcpu->cpudata; 1945 struct vmcb *vmcb = cpudata->vmcb; 1946 1947 /* Allow reads/writes of Control Registers. */ 1948 vmcb->ctrl.intercept_cr = 0; 1949 1950 /* Allow reads/writes of Debug Registers. */ 1951 vmcb->ctrl.intercept_dr = 0; 1952 1953 /* Allow exceptions 0 to 31. */ 1954 vmcb->ctrl.intercept_vec = 0; 1955 1956 /* 1957 * Allow: 1958 * - SMI [smm interrupts] 1959 * - VINTR [virtual interrupts] 1960 * - CR0_SPEC [CR0 writes changing other fields than CR0.TS or CR0.MP] 1961 * - RIDTR [reads of IDTR] 1962 * - RGDTR [reads of GDTR] 1963 * - RLDTR [reads of LDTR] 1964 * - RTR [reads of TR] 1965 * - WIDTR [writes of IDTR] 1966 * - WGDTR [writes of GDTR] 1967 * - WLDTR [writes of LDTR] 1968 * - WTR [writes of TR] 1969 * - RDTSC [rdtsc instruction] 1970 * - PUSHF [pushf instruction] 1971 * - POPF [popf instruction] 1972 * - IRET [iret instruction] 1973 * - INTN [int $n instructions] 1974 * - INVD [invd instruction] 1975 * - PAUSE [pause instruction] 1976 * - INVLPG [invplg instruction] 1977 * - TASKSW [task switches] 1978 * 1979 * Intercept the rest below. 1980 */ 1981 vmcb->ctrl.intercept_misc1 = 1982 VMCB_CTRL_INTERCEPT_INTR | 1983 VMCB_CTRL_INTERCEPT_NMI | 1984 VMCB_CTRL_INTERCEPT_INIT | 1985 VMCB_CTRL_INTERCEPT_RDPMC | 1986 VMCB_CTRL_INTERCEPT_CPUID | 1987 VMCB_CTRL_INTERCEPT_RSM | 1988 VMCB_CTRL_INTERCEPT_HLT | 1989 VMCB_CTRL_INTERCEPT_INVLPGA | 1990 VMCB_CTRL_INTERCEPT_IOIO_PROT | 1991 VMCB_CTRL_INTERCEPT_MSR_PROT | 1992 VMCB_CTRL_INTERCEPT_FERR_FREEZE | 1993 VMCB_CTRL_INTERCEPT_SHUTDOWN; 1994 1995 /* 1996 * Allow: 1997 * - ICEBP [icebp instruction] 1998 * - WBINVD [wbinvd instruction] 1999 * - WCR_SPEC(0..15) [writes of CR0-15, received after instruction] 2000 * 2001 * Intercept the rest below. 2002 */ 2003 vmcb->ctrl.intercept_misc2 = 2004 VMCB_CTRL_INTERCEPT_VMRUN | 2005 VMCB_CTRL_INTERCEPT_VMMCALL | 2006 VMCB_CTRL_INTERCEPT_VMLOAD | 2007 VMCB_CTRL_INTERCEPT_VMSAVE | 2008 VMCB_CTRL_INTERCEPT_STGI | 2009 VMCB_CTRL_INTERCEPT_CLGI | 2010 VMCB_CTRL_INTERCEPT_SKINIT | 2011 VMCB_CTRL_INTERCEPT_RDTSCP | 2012 VMCB_CTRL_INTERCEPT_MONITOR | 2013 VMCB_CTRL_INTERCEPT_MWAIT | 2014 VMCB_CTRL_INTERCEPT_XSETBV; 2015 2016 /* Intercept all I/O accesses. */ 2017 memset(cpudata->iobm, 0xFF, IOBM_SIZE); 2018 vmcb->ctrl.iopm_base_pa = cpudata->iobm_pa; 2019 2020 /* Allow direct access to certain MSRs. */ 2021 memset(cpudata->msrbm, 0xFF, MSRBM_SIZE); 2022 svm_vcpu_msr_allow(cpudata->msrbm, MSR_EFER, true, false); 2023 svm_vcpu_msr_allow(cpudata->msrbm, MSR_STAR, true, true); 2024 svm_vcpu_msr_allow(cpudata->msrbm, MSR_LSTAR, true, true); 2025 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CSTAR, true, true); 2026 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SFMASK, true, true); 2027 svm_vcpu_msr_allow(cpudata->msrbm, MSR_KERNELGSBASE, true, true); 2028 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_CS, true, true); 2029 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_ESP, true, true); 2030 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_EIP, true, true); 2031 svm_vcpu_msr_allow(cpudata->msrbm, MSR_FSBASE, true, true); 2032 svm_vcpu_msr_allow(cpudata->msrbm, MSR_GSBASE, true, true); 2033 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CR_PAT, true, true); 2034 svm_vcpu_msr_allow(cpudata->msrbm, MSR_TSC, true, false); 2035 vmcb->ctrl.msrpm_base_pa = cpudata->msrbm_pa; 2036 2037 /* Generate ASID. */ 2038 svm_asid_alloc(vcpu); 2039 2040 /* Virtual TPR. */ 2041 vmcb->ctrl.v = VMCB_CTRL_V_INTR_MASKING; 2042 2043 /* Enable Nested Paging. */ 2044 vmcb->ctrl.enable1 = VMCB_CTRL_ENABLE_NP; 2045 vmcb->ctrl.n_cr3 = mach->vm->vm_map.pmap->pm_pdirpa[0]; 2046 2047 /* Init XSAVE header. */ 2048 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 2049 cpudata->gfpu.xsh_xcomp_bv = 0; 2050 2051 /* These MSRs are static. */ 2052 cpudata->star = rdmsr(MSR_STAR); 2053 cpudata->lstar = rdmsr(MSR_LSTAR); 2054 cpudata->cstar = rdmsr(MSR_CSTAR); 2055 cpudata->sfmask = rdmsr(MSR_SFMASK); 2056 2057 /* Install the RESET state. */ 2058 memcpy(&vcpu->comm->state, &nvmm_x86_reset_state, 2059 sizeof(nvmm_x86_reset_state)); 2060 vcpu->comm->state_wanted = NVMM_X64_STATE_ALL; 2061 vcpu->comm->state_cached = 0; 2062 svm_vcpu_setstate(vcpu); 2063} 2064 2065static int 2066svm_vcpu_create(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2067{ 2068 struct svm_cpudata *cpudata; 2069 int error; 2070 2071 /* Allocate the SVM cpudata. */ 2072 cpudata = (struct svm_cpudata *)uvm_km_alloc(kernel_map, 2073 roundup(sizeof(*cpudata), PAGE_SIZE), 0, 2074 UVM_KMF_WIRED|UVM_KMF_ZERO); 2075 vcpu->cpudata = cpudata; 2076 2077 /* VMCB */ 2078 error = svm_memalloc(&cpudata->vmcb_pa, (vaddr_t *)&cpudata->vmcb, 2079 VMCB_NPAGES); 2080 if (error) 2081 goto error; 2082 2083 /* I/O Bitmap */ 2084 error = svm_memalloc(&cpudata->iobm_pa, (vaddr_t *)&cpudata->iobm, 2085 IOBM_NPAGES); 2086 if (error) 2087 goto error; 2088 2089 /* MSR Bitmap */ 2090 error = svm_memalloc(&cpudata->msrbm_pa, (vaddr_t *)&cpudata->msrbm, 2091 MSRBM_NPAGES); 2092 if (error) 2093 goto error; 2094 2095 /* Init the VCPU info. */ 2096 svm_vcpu_init(mach, vcpu); 2097 2098 return 0; 2099 2100error: 2101 if (cpudata->vmcb_pa) { 2102 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, 2103 VMCB_NPAGES); 2104 } 2105 if (cpudata->iobm_pa) { 2106 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, 2107 IOBM_NPAGES); 2108 } 2109 if (cpudata->msrbm_pa) { 2110 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, 2111 MSRBM_NPAGES); 2112 } 2113 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2114 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2115 return error; 2116} 2117 2118static void 2119svm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2120{ 2121 struct svm_cpudata *cpudata = vcpu->cpudata; 2122 2123 svm_asid_free(vcpu); 2124 2125 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, VMCB_NPAGES); 2126 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, IOBM_NPAGES); 2127 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, MSRBM_NPAGES); 2128 2129 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2130 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2131} 2132 2133/* -------------------------------------------------------------------------- */ 2134 2135static int 2136svm_vcpu_configure_cpuid(struct svm_cpudata *cpudata, void *data) 2137{ 2138 struct nvmm_vcpu_conf_cpuid *cpuid = data; 2139 size_t i; 2140 2141 if (__predict_false(cpuid->mask && cpuid->exit)) { 2142 return EINVAL; 2143 } 2144 if (__predict_false(cpuid->mask && 2145 ((cpuid->u.mask.set.eax & cpuid->u.mask.del.eax) || 2146 (cpuid->u.mask.set.ebx & cpuid->u.mask.del.ebx) || 2147 (cpuid->u.mask.set.ecx & cpuid->u.mask.del.ecx) || 2148 (cpuid->u.mask.set.edx & cpuid->u.mask.del.edx)))) { 2149 return EINVAL; 2150 } 2151 2152 /* If unset, delete, to restore the default behavior. */ 2153 if (!cpuid->mask && !cpuid->exit) { 2154 for (i = 0; i < SVM_NCPUIDS; i++) { 2155 if (!cpudata->cpuidpresent[i]) { 2156 continue; 2157 } 2158 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2159 cpudata->cpuidpresent[i] = false; 2160 } 2161 } 2162 return 0; 2163 } 2164 2165 /* If already here, replace. */ 2166 for (i = 0; i < SVM_NCPUIDS; i++) { 2167 if (!cpudata->cpuidpresent[i]) { 2168 continue; 2169 } 2170 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2171 memcpy(&cpudata->cpuid[i], cpuid, 2172 sizeof(struct nvmm_vcpu_conf_cpuid)); 2173 return 0; 2174 } 2175 } 2176 2177 /* Not here, insert. */ 2178 for (i = 0; i < SVM_NCPUIDS; i++) { 2179 if (!cpudata->cpuidpresent[i]) { 2180 cpudata->cpuidpresent[i] = true; 2181 memcpy(&cpudata->cpuid[i], cpuid, 2182 sizeof(struct nvmm_vcpu_conf_cpuid)); 2183 return 0; 2184 } 2185 } 2186 2187 return ENOBUFS; 2188} 2189 2190static int 2191svm_vcpu_configure(struct nvmm_cpu *vcpu, uint64_t op, void *data) 2192{ 2193 struct svm_cpudata *cpudata = vcpu->cpudata; 2194 2195 switch (op) { 2196 case NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID): 2197 return svm_vcpu_configure_cpuid(cpudata, data); 2198 default: 2199 return EINVAL; 2200 } 2201} 2202 2203/* -------------------------------------------------------------------------- */ 2204 2205static void 2206svm_tlb_flush(struct pmap *pm) 2207{ 2208 struct nvmm_machine *mach = pm->pm_data; 2209 struct svm_machdata *machdata = mach->machdata; 2210 2211 atomic_inc_64(&machdata->mach_htlb_gen); 2212 2213 /* Generates IPIs, which cause #VMEXITs. */ 2214 pmap_tlb_shootdown(pmap_kernel(), -1, PTE_G, TLBSHOOT_UPDATE); 2215} 2216 2217static void 2218svm_machine_create(struct nvmm_machine *mach) 2219{ 2220 struct svm_machdata *machdata; 2221 2222 /* Fill in pmap info. */ 2223 mach->vm->vm_map.pmap->pm_data = (void *)mach; 2224 mach->vm->vm_map.pmap->pm_tlb_flush = svm_tlb_flush; 2225 2226 machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP); 2227 mach->machdata = machdata; 2228 2229 /* Start with an hTLB flush everywhere. */ 2230 machdata->mach_htlb_gen = 1; 2231} 2232 2233static void 2234svm_machine_destroy(struct nvmm_machine *mach) 2235{ 2236 kmem_free(mach->machdata, sizeof(struct svm_machdata)); 2237} 2238 2239static int 2240svm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *data) 2241{ 2242 panic("%s: impossible", __func__); 2243} 2244 2245/* -------------------------------------------------------------------------- */ 2246 2247static bool 2248svm_ident(void) 2249{ 2250 u_int descs[4]; 2251 uint64_t msr; 2252 2253 if (cpu_vendor != CPUVENDOR_AMD) { 2254 return false; 2255 } 2256 if (!(cpu_feature[3] & CPUID_SVM)) { 2257 return false; 2258 } 2259 2260 if (curcpu()->ci_max_ext_cpuid < 0x8000000a) { 2261 return false; 2262 } 2263 x86_cpuid(0x8000000a, descs); 2264 2265 /* Want Nested Paging. */ 2266 if (!(descs[3] & CPUID_AMD_SVM_NP)) { 2267 return false; 2268 } 2269 2270 /* Want nRIP. */ 2271 if (!(descs[3] & CPUID_AMD_SVM_NRIPS)) { 2272 return false; 2273 } 2274 2275 svm_decode_assist = (descs[3] & CPUID_AMD_SVM_DecodeAssist) != 0; 2276 2277 msr = rdmsr(MSR_VMCR); 2278 if ((msr & VMCR_SVMED) && (msr & VMCR_LOCK)) { 2279 return false; 2280 } 2281 2282 return true; 2283} 2284 2285static void 2286svm_init_asid(uint32_t maxasid) 2287{ 2288 size_t i, j, allocsz; 2289 2290 mutex_init(&svm_asidlock, MUTEX_DEFAULT, IPL_NONE); 2291 2292 /* Arbitrarily limit. */ 2293 maxasid = uimin(maxasid, 8192); 2294 2295 svm_maxasid = maxasid; 2296 allocsz = roundup(maxasid, 8) / 8; 2297 svm_asidmap = kmem_zalloc(allocsz, KM_SLEEP); 2298 2299 /* ASID 0 is reserved for the host. */ 2300 svm_asidmap[0] |= __BIT(0); 2301 2302 /* ASID n-1 is special, we share it. */ 2303 i = (maxasid - 1) / 8; 2304 j = (maxasid - 1) % 8; 2305 svm_asidmap[i] |= __BIT(j); 2306} 2307 2308static void 2309svm_change_cpu(void *arg1, void *arg2) 2310{ 2311 bool enable = (bool)arg1; 2312 uint64_t msr; 2313 2314 msr = rdmsr(MSR_VMCR); 2315 if (msr & VMCR_SVMED) { 2316 wrmsr(MSR_VMCR, msr & ~VMCR_SVMED); 2317 } 2318 2319 if (!enable) { 2320 wrmsr(MSR_VM_HSAVE_PA, 0); 2321 } 2322 2323 msr = rdmsr(MSR_EFER); 2324 if (enable) { 2325 msr |= EFER_SVME; 2326 } else { 2327 msr &= ~EFER_SVME; 2328 } 2329 wrmsr(MSR_EFER, msr); 2330 2331 if (enable) { 2332 wrmsr(MSR_VM_HSAVE_PA, hsave[cpu_index(curcpu())].pa); 2333 } 2334} 2335 2336static void 2337svm_init(void) 2338{ 2339 CPU_INFO_ITERATOR cii; 2340 struct cpu_info *ci; 2341 struct vm_page *pg; 2342 u_int descs[4]; 2343 uint64_t xc; 2344 2345 x86_cpuid(0x8000000a, descs); 2346 2347 /* The guest TLB flush command. */ 2348 if (descs[3] & CPUID_AMD_SVM_FlushByASID) { 2349 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_GUEST; 2350 } else { 2351 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_ALL; 2352 } 2353 2354 /* Init the ASID. */ 2355 svm_init_asid(descs[1]); 2356 2357 /* Init the XCR0 mask. */ 2358 svm_xcr0_mask = SVM_XCR0_MASK_DEFAULT & x86_xsave_features; 2359 2360 memset(hsave, 0, sizeof(hsave)); 2361 for (CPU_INFO_FOREACH(cii, ci)) { 2362 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); 2363 hsave[cpu_index(ci)].pa = VM_PAGE_TO_PHYS(pg); 2364 } 2365 2366 xc = xc_broadcast(0, svm_change_cpu, (void *)true, NULL); 2367 xc_wait(xc); 2368} 2369 2370static void 2371svm_fini_asid(void) 2372{ 2373 size_t allocsz; 2374 2375 allocsz = roundup(svm_maxasid, 8) / 8; 2376 kmem_free(svm_asidmap, allocsz); 2377 2378 mutex_destroy(&svm_asidlock); 2379} 2380 2381static void 2382svm_fini(void) 2383{ 2384 uint64_t xc; 2385 size_t i; 2386 2387 xc = xc_broadcast(0, svm_change_cpu, (void *)false, NULL); 2388 xc_wait(xc); 2389 2390 for (i = 0; i < MAXCPUS; i++) { 2391 if (hsave[i].pa != 0) 2392 uvm_pagefree(PHYS_TO_VM_PAGE(hsave[i].pa)); 2393 } 2394 2395 svm_fini_asid(); 2396} 2397 2398static void 2399svm_capability(struct nvmm_capability *cap) 2400{ 2401 cap->arch.mach_conf_support = 0; 2402 cap->arch.vcpu_conf_support = 2403 NVMM_CAP_ARCH_VCPU_CONF_CPUID; 2404 cap->arch.xcr0_mask = svm_xcr0_mask; 2405 cap->arch.mxcsr_mask = x86_fpu_mxcsr_mask; 2406 cap->arch.conf_cpuid_maxops = SVM_NCPUIDS; 2407} 2408 2409const struct nvmm_impl nvmm_x86_svm = { 2410 .ident = svm_ident, 2411 .init = svm_init, 2412 .fini = svm_fini, 2413 .capability = svm_capability, 2414 .mach_conf_max = NVMM_X86_MACH_NCONF, 2415 .mach_conf_sizes = NULL, 2416 .vcpu_conf_max = NVMM_X86_VCPU_NCONF, 2417 .vcpu_conf_sizes = svm_vcpu_conf_sizes, 2418 .state_size = sizeof(struct nvmm_x64_state), 2419 .machine_create = svm_machine_create, 2420 .machine_destroy = svm_machine_destroy, 2421 .machine_configure = svm_machine_configure, 2422 .vcpu_create = svm_vcpu_create, 2423 .vcpu_destroy = svm_vcpu_destroy, 2424 .vcpu_configure = svm_vcpu_configure, 2425 .vcpu_setstate = svm_vcpu_setstate, 2426 .vcpu_getstate = svm_vcpu_getstate, 2427 .vcpu_inject = svm_vcpu_inject, 2428 .vcpu_run = svm_vcpu_run 2429}; 2430