vmm_instruction_emul.c revision 267396
112651Skvn/*- 212651Skvn * Copyright (c) 2012 Sandvine, Inc. 312651Skvn * Copyright (c) 2012 NetApp, Inc. 412651Skvn * All rights reserved. 512651Skvn * 612651Skvn * Redistribution and use in source and binary forms, with or without 712651Skvn * modification, are permitted provided that the following conditions 812651Skvn * are met: 912651Skvn * 1. Redistributions of source code must retain the above copyright 1012651Skvn * notice, this list of conditions and the following disclaimer. 1112651Skvn * 2. Redistributions in binary form must reproduce the above copyright 1212651Skvn * notice, this list of conditions and the following disclaimer in the 1312651Skvn * documentation and/or other materials provided with the distribution. 1412651Skvn * 1512651Skvn * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1612651Skvn * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1712651Skvn * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1812651Skvn * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1912651Skvn * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2012651Skvn * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2112651Skvn * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2212651Skvn * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2312651Skvn * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2412651Skvn * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2512651Skvn * SUCH DAMAGE. 2612651Skvn * 2712651Skvn * $FreeBSD: stable/10/sys/amd64/vmm/vmm_instruction_emul.c 267396 2014-06-12 13:48:52Z jhb $ 2812651Skvn */ 2912651Skvn 3012651Skvn#include <sys/cdefs.h> 3112651Skvn__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm_instruction_emul.c 267396 2014-06-12 13:48:52Z jhb $"); 3212651Skvn 3312651Skvn#ifdef _KERNEL 3412651Skvn#include <sys/param.h> 3512651Skvn#include <sys/pcpu.h> 3612651Skvn#include <sys/systm.h> 3712651Skvn 3812651Skvn#include <vm/vm.h> 3912651Skvn#include <vm/pmap.h> 4012651Skvn 4112651Skvn#include <machine/vmparam.h> 4212651Skvn#include <machine/vmm.h> 4312651Skvn#else /* !_KERNEL */ 4412651Skvn#include <sys/types.h> 4512651Skvn#include <sys/errno.h> 4612651Skvn 4712651Skvn#include <machine/vmm.h> 4812651Skvn 4912651Skvn#include <vmmapi.h> 5012651Skvn#endif /* _KERNEL */ 5112651Skvn 5212651Skvnenum cpu_mode { 5312651Skvn CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ 5412651Skvn CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ 5512651Skvn}; 5612651Skvn 5712651Skvn/* struct vie_op.op_type */ 5812651Skvnenum { 5912651Skvn VIE_OP_TYPE_NONE = 0, 6012651Skvn VIE_OP_TYPE_MOV, 6112651Skvn VIE_OP_TYPE_MOVSX, 6212651Skvn VIE_OP_TYPE_MOVZX, 6312651Skvn VIE_OP_TYPE_AND, 6412651Skvn VIE_OP_TYPE_OR, 6512651Skvn VIE_OP_TYPE_TWO_BYTE, 6612651Skvn VIE_OP_TYPE_LAST 6712651Skvn}; 6812651Skvn 6912651Skvn/* struct vie_op.op_flags */ 7012651Skvn#define VIE_OP_F_IMM (1 << 0) /* immediate operand present */ 7112651Skvn#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 7212651Skvn 7312651Skvnstatic const struct vie_op two_byte_opcodes[256] = { 7412651Skvn [0xB6] = { 7512651Skvn .op_byte = 0xB6, 7612651Skvn .op_type = VIE_OP_TYPE_MOVZX, 7712651Skvn }, 7812651Skvn [0xBE] = { 7912651Skvn .op_byte = 0xBE, 8012651Skvn .op_type = VIE_OP_TYPE_MOVSX, 8112651Skvn }, 8212651Skvn}; 8312651Skvn 8412651Skvnstatic const struct vie_op one_byte_opcodes[256] = { 8512651Skvn [0x0F] = { 8612651Skvn .op_byte = 0x0F, 8712651Skvn .op_type = VIE_OP_TYPE_TWO_BYTE 8812651Skvn }, 8912651Skvn [0x88] = { 9012651Skvn .op_byte = 0x88, 9112651Skvn .op_type = VIE_OP_TYPE_MOV, 9212651Skvn }, 9312651Skvn [0x89] = { 9412651Skvn .op_byte = 0x89, 9512651Skvn .op_type = VIE_OP_TYPE_MOV, 9612651Skvn }, 9712651Skvn [0x8A] = { 9812651Skvn .op_byte = 0x8A, 9912651Skvn .op_type = VIE_OP_TYPE_MOV, 10012651Skvn }, 10112651Skvn [0x8B] = { 10212651Skvn .op_byte = 0x8B, 10312651Skvn .op_type = VIE_OP_TYPE_MOV, 10412651Skvn }, 10512651Skvn [0xC7] = { 10612651Skvn .op_byte = 0xC7, 10712651Skvn .op_type = VIE_OP_TYPE_MOV, 10812651Skvn .op_flags = VIE_OP_F_IMM, 10912651Skvn }, 11012651Skvn [0x23] = { 11112651Skvn .op_byte = 0x23, 11212651Skvn .op_type = VIE_OP_TYPE_AND, 11312651Skvn }, 11412651Skvn [0x81] = { 11512651Skvn /* XXX Group 1 extended opcode - not just AND */ 11612651Skvn .op_byte = 0x81, 11712651Skvn .op_type = VIE_OP_TYPE_AND, 11812651Skvn .op_flags = VIE_OP_F_IMM, 119 }, 120 [0x83] = { 121 /* XXX Group 1 extended opcode - not just OR */ 122 .op_byte = 0x83, 123 .op_type = VIE_OP_TYPE_OR, 124 .op_flags = VIE_OP_F_IMM8, 125 }, 126}; 127 128/* struct vie.mod */ 129#define VIE_MOD_INDIRECT 0 130#define VIE_MOD_INDIRECT_DISP8 1 131#define VIE_MOD_INDIRECT_DISP32 2 132#define VIE_MOD_DIRECT 3 133 134/* struct vie.rm */ 135#define VIE_RM_SIB 4 136#define VIE_RM_DISP32 5 137 138#define GB (1024 * 1024 * 1024) 139 140static enum vm_reg_name gpr_map[16] = { 141 VM_REG_GUEST_RAX, 142 VM_REG_GUEST_RCX, 143 VM_REG_GUEST_RDX, 144 VM_REG_GUEST_RBX, 145 VM_REG_GUEST_RSP, 146 VM_REG_GUEST_RBP, 147 VM_REG_GUEST_RSI, 148 VM_REG_GUEST_RDI, 149 VM_REG_GUEST_R8, 150 VM_REG_GUEST_R9, 151 VM_REG_GUEST_R10, 152 VM_REG_GUEST_R11, 153 VM_REG_GUEST_R12, 154 VM_REG_GUEST_R13, 155 VM_REG_GUEST_R14, 156 VM_REG_GUEST_R15 157}; 158 159static uint64_t size2mask[] = { 160 [1] = 0xff, 161 [2] = 0xffff, 162 [4] = 0xffffffff, 163 [8] = 0xffffffffffffffff, 164}; 165 166static int 167vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 168{ 169 int error; 170 171 error = vm_get_register(vm, vcpuid, reg, rval); 172 173 return (error); 174} 175 176static int 177vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 178{ 179 uint64_t val; 180 int error, rshift; 181 enum vm_reg_name reg; 182 183 rshift = 0; 184 reg = gpr_map[vie->reg]; 185 186 /* 187 * 64-bit mode imposes limitations on accessing legacy byte registers. 188 * 189 * The legacy high-byte registers cannot be addressed if the REX 190 * prefix is present. In this case the values 4, 5, 6 and 7 of the 191 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 192 * 193 * If the REX prefix is not present then the values 4, 5, 6 and 7 194 * of the 'ModRM:reg' field address the legacy high-byte registers, 195 * %ah, %ch, %dh and %bh respectively. 196 */ 197 if (!vie->rex_present) { 198 if (vie->reg & 0x4) { 199 /* 200 * Obtain the value of %ah by reading %rax and shifting 201 * right by 8 bits (same for %bh, %ch and %dh). 202 */ 203 rshift = 8; 204 reg = gpr_map[vie->reg & 0x3]; 205 } 206 } 207 208 error = vm_get_register(vm, vcpuid, reg, &val); 209 *rval = val >> rshift; 210 return (error); 211} 212 213static int 214vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 215 uint64_t val, int size) 216{ 217 int error; 218 uint64_t origval; 219 220 switch (size) { 221 case 1: 222 case 2: 223 error = vie_read_register(vm, vcpuid, reg, &origval); 224 if (error) 225 return (error); 226 val &= size2mask[size]; 227 val |= origval & ~size2mask[size]; 228 break; 229 case 4: 230 val &= 0xffffffffUL; 231 break; 232 case 8: 233 break; 234 default: 235 return (EINVAL); 236 } 237 238 error = vm_set_register(vm, vcpuid, reg, val); 239 return (error); 240} 241 242/* 243 * The following simplifying assumptions are made during emulation: 244 * 245 * - guest is in 64-bit mode 246 * - default address size is 64-bits 247 * - default operand size is 32-bits 248 * 249 * - operand size override is not supported 250 * 251 * - address size override is not supported 252 */ 253static int 254emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 255 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 256{ 257 int error, size; 258 enum vm_reg_name reg; 259 uint8_t byte; 260 uint64_t val; 261 262 size = 4; 263 error = EINVAL; 264 265 switch (vie->op.op_byte) { 266 case 0x88: 267 /* 268 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 269 * 88/r: mov r/m8, r8 270 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 271 */ 272 size = 1; 273 error = vie_read_bytereg(vm, vcpuid, vie, &byte); 274 if (error == 0) 275 error = memwrite(vm, vcpuid, gpa, byte, size, arg); 276 break; 277 case 0x89: 278 /* 279 * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 280 * 89/r: mov r/m32, r32 281 * REX.W + 89/r mov r/m64, r64 282 */ 283 if (vie->rex_w) 284 size = 8; 285 reg = gpr_map[vie->reg]; 286 error = vie_read_register(vm, vcpuid, reg, &val); 287 if (error == 0) { 288 val &= size2mask[size]; 289 error = memwrite(vm, vcpuid, gpa, val, size, arg); 290 } 291 break; 292 case 0x8A: 293 case 0x8B: 294 /* 295 * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 296 * 8A/r: mov r/m8, r8 297 * REX + 8A/r: mov r/m8, r8 298 * 8B/r: mov r32, r/m32 299 * REX.W 8B/r: mov r64, r/m64 300 */ 301 if (vie->op.op_byte == 0x8A) 302 size = 1; 303 else if (vie->rex_w) 304 size = 8; 305 error = memread(vm, vcpuid, gpa, &val, size, arg); 306 if (error == 0) { 307 reg = gpr_map[vie->reg]; 308 error = vie_update_register(vm, vcpuid, reg, val, size); 309 } 310 break; 311 case 0xC7: 312 /* 313 * MOV from imm32 to mem (ModRM:r/m) 314 * C7/0 mov r/m32, imm32 315 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 316 */ 317 val = vie->immediate; /* already sign-extended */ 318 319 if (vie->rex_w) 320 size = 8; 321 322 if (size != 8) 323 val &= size2mask[size]; 324 325 error = memwrite(vm, vcpuid, gpa, val, size, arg); 326 break; 327 default: 328 break; 329 } 330 331 return (error); 332} 333 334/* 335 * The following simplifying assumptions are made during emulation: 336 * 337 * - guest is in 64-bit mode 338 * - default address size is 64-bits 339 * - default operand size is 32-bits 340 * 341 * - operand size override is not supported 342 * 343 * - address size override is not supported 344 */ 345static int 346emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 347 mem_region_read_t memread, mem_region_write_t memwrite, 348 void *arg) 349{ 350 int error, size; 351 enum vm_reg_name reg; 352 uint64_t val; 353 354 size = 4; 355 error = EINVAL; 356 357 switch (vie->op.op_byte) { 358 case 0xB6: 359 /* 360 * MOV and zero extend byte from mem (ModRM:r/m) to 361 * reg (ModRM:reg). 362 * 363 * 0F B6/r movzx r/m8, r32 364 * REX.W + 0F B6/r movzx r/m8, r64 365 */ 366 367 /* get the first operand */ 368 error = memread(vm, vcpuid, gpa, &val, 1, arg); 369 if (error) 370 break; 371 372 /* get the second operand */ 373 reg = gpr_map[vie->reg]; 374 375 if (vie->rex_w) 376 size = 8; 377 378 /* write the result */ 379 error = vie_update_register(vm, vcpuid, reg, val, size); 380 break; 381 case 0xBE: 382 /* 383 * MOV and sign extend byte from mem (ModRM:r/m) to 384 * reg (ModRM:reg). 385 * 386 * 0F BE/r movsx r/m8, r32 387 * REX.W + 0F BE/r movsx r/m8, r64 388 */ 389 390 /* get the first operand */ 391 error = memread(vm, vcpuid, gpa, &val, 1, arg); 392 if (error) 393 break; 394 395 /* get the second operand */ 396 reg = gpr_map[vie->reg]; 397 398 if (vie->rex_w) 399 size = 8; 400 401 /* sign extend byte */ 402 val = (int8_t)val; 403 404 /* write the result */ 405 error = vie_update_register(vm, vcpuid, reg, val, size); 406 break; 407 default: 408 break; 409 } 410 return (error); 411} 412 413static int 414emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 415 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 416{ 417 int error, size; 418 enum vm_reg_name reg; 419 uint64_t val1, val2; 420 421 size = 4; 422 error = EINVAL; 423 424 switch (vie->op.op_byte) { 425 case 0x23: 426 /* 427 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 428 * result in reg. 429 * 430 * 23/r and r32, r/m32 431 * REX.W + 23/r and r64, r/m64 432 */ 433 if (vie->rex_w) 434 size = 8; 435 436 /* get the first operand */ 437 reg = gpr_map[vie->reg]; 438 error = vie_read_register(vm, vcpuid, reg, &val1); 439 if (error) 440 break; 441 442 /* get the second operand */ 443 error = memread(vm, vcpuid, gpa, &val2, size, arg); 444 if (error) 445 break; 446 447 /* perform the operation and write the result */ 448 val1 &= val2; 449 error = vie_update_register(vm, vcpuid, reg, val1, size); 450 break; 451 case 0x81: 452 /* 453 * AND mem (ModRM:r/m) with immediate and store the 454 * result in mem. 455 * 456 * 81/ and r/m32, imm32 457 * REX.W + 81/ and r/m64, imm32 sign-extended to 64 458 * 459 * Currently, only the AND operation of the 0x81 opcode 460 * is implemented (ModRM:reg = b100). 461 */ 462 if ((vie->reg & 7) != 4) 463 break; 464 465 if (vie->rex_w) 466 size = 8; 467 468 /* get the first operand */ 469 error = memread(vm, vcpuid, gpa, &val1, size, arg); 470 if (error) 471 break; 472 473 /* 474 * perform the operation with the pre-fetched immediate 475 * operand and write the result 476 */ 477 val1 &= vie->immediate; 478 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 479 break; 480 default: 481 break; 482 } 483 return (error); 484} 485 486static int 487emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 488 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 489{ 490 int error, size; 491 uint64_t val1; 492 493 size = 4; 494 error = EINVAL; 495 496 switch (vie->op.op_byte) { 497 case 0x83: 498 /* 499 * OR mem (ModRM:r/m) with immediate and store the 500 * result in mem. 501 * 502 * 83/ OR r/m32, imm8 sign-extended to 32 503 * REX.W + 83/ OR r/m64, imm8 sign-extended to 64 504 * 505 * Currently, only the OR operation of the 0x83 opcode 506 * is implemented (ModRM:reg = b001). 507 */ 508 if ((vie->reg & 7) != 1) 509 break; 510 511 if (vie->rex_w) 512 size = 8; 513 514 /* get the first operand */ 515 error = memread(vm, vcpuid, gpa, &val1, size, arg); 516 if (error) 517 break; 518 519 /* 520 * perform the operation with the pre-fetched immediate 521 * operand and write the result 522 */ 523 val1 |= vie->immediate; 524 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 525 break; 526 default: 527 break; 528 } 529 return (error); 530} 531 532int 533vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 534 mem_region_read_t memread, mem_region_write_t memwrite, 535 void *memarg) 536{ 537 int error; 538 539 if (!vie->decoded) 540 return (EINVAL); 541 542 switch (vie->op.op_type) { 543 case VIE_OP_TYPE_MOV: 544 error = emulate_mov(vm, vcpuid, gpa, vie, 545 memread, memwrite, memarg); 546 break; 547 case VIE_OP_TYPE_MOVSX: 548 case VIE_OP_TYPE_MOVZX: 549 error = emulate_movx(vm, vcpuid, gpa, vie, 550 memread, memwrite, memarg); 551 break; 552 case VIE_OP_TYPE_AND: 553 error = emulate_and(vm, vcpuid, gpa, vie, 554 memread, memwrite, memarg); 555 break; 556 case VIE_OP_TYPE_OR: 557 error = emulate_or(vm, vcpuid, gpa, vie, 558 memread, memwrite, memarg); 559 break; 560 default: 561 error = EINVAL; 562 break; 563 } 564 565 return (error); 566} 567 568#ifdef _KERNEL 569void 570vie_init(struct vie *vie) 571{ 572 573 bzero(vie, sizeof(struct vie)); 574 575 vie->base_register = VM_REG_LAST; 576 vie->index_register = VM_REG_LAST; 577} 578 579static int 580gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, 581 uint64_t *gpa, uint64_t *gpaend) 582{ 583 int nlevels, ptpshift, ptpindex; 584 uint64_t *ptpbase, pte, pgsize; 585 void *cookie; 586 587 /* 588 * XXX assumes 64-bit guest with 4 page walk levels 589 */ 590 nlevels = 4; 591 while (--nlevels >= 0) { 592 /* Zero out the lower 12 bits and the upper 12 bits */ 593 ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 594 595 ptpbase = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, VM_PROT_READ, 596 &cookie); 597 if (ptpbase == NULL) 598 goto error; 599 600 ptpshift = PAGE_SHIFT + nlevels * 9; 601 ptpindex = (gla >> ptpshift) & 0x1FF; 602 pgsize = 1UL << ptpshift; 603 604 pte = ptpbase[ptpindex]; 605 606 vm_gpa_release(cookie); 607 608 if ((pte & PG_V) == 0) 609 goto error; 610 611 if (pte & PG_PS) { 612 if (pgsize > 1 * GB) 613 goto error; 614 else 615 break; 616 } 617 618 ptpphys = pte; 619 } 620 621 /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 622 pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 623 *gpa = pte | (gla & (pgsize - 1)); 624 *gpaend = pte + pgsize; 625 return (0); 626 627error: 628 return (-1); 629} 630 631int 632vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, 633 uint64_t cr3, struct vie *vie) 634{ 635 int n, err, prot; 636 uint64_t gpa, gpaend, off; 637 void *hpa, *cookie; 638 639 /* 640 * XXX cache previously fetched instructions using 'rip' as the tag 641 */ 642 643 prot = VM_PROT_READ | VM_PROT_EXECUTE; 644 if (inst_length > VIE_INST_SIZE) 645 panic("vmm_fetch_instruction: invalid length %d", inst_length); 646 647 /* Copy the instruction into 'vie' */ 648 while (vie->num_valid < inst_length) { 649 err = gla2gpa(vm, rip, cr3, &gpa, &gpaend); 650 if (err) 651 break; 652 653 off = gpa & PAGE_MASK; 654 n = min(inst_length - vie->num_valid, PAGE_SIZE - off); 655 656 if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL) 657 break; 658 659 bcopy(hpa, &vie->inst[vie->num_valid], n); 660 661 vm_gpa_release(cookie); 662 663 rip += n; 664 vie->num_valid += n; 665 } 666 667 if (vie->num_valid == inst_length) 668 return (0); 669 else 670 return (-1); 671} 672 673static int 674vie_peek(struct vie *vie, uint8_t *x) 675{ 676 677 if (vie->num_processed < vie->num_valid) { 678 *x = vie->inst[vie->num_processed]; 679 return (0); 680 } else 681 return (-1); 682} 683 684static void 685vie_advance(struct vie *vie) 686{ 687 688 vie->num_processed++; 689} 690 691static int 692decode_rex(struct vie *vie) 693{ 694 uint8_t x; 695 696 if (vie_peek(vie, &x)) 697 return (-1); 698 699 if (x >= 0x40 && x <= 0x4F) { 700 vie->rex_present = 1; 701 702 vie->rex_w = x & 0x8 ? 1 : 0; 703 vie->rex_r = x & 0x4 ? 1 : 0; 704 vie->rex_x = x & 0x2 ? 1 : 0; 705 vie->rex_b = x & 0x1 ? 1 : 0; 706 707 vie_advance(vie); 708 } 709 710 return (0); 711} 712 713static int 714decode_two_byte_opcode(struct vie *vie) 715{ 716 uint8_t x; 717 718 if (vie_peek(vie, &x)) 719 return (-1); 720 721 vie->op = two_byte_opcodes[x]; 722 723 if (vie->op.op_type == VIE_OP_TYPE_NONE) 724 return (-1); 725 726 vie_advance(vie); 727 return (0); 728} 729 730static int 731decode_opcode(struct vie *vie) 732{ 733 uint8_t x; 734 735 if (vie_peek(vie, &x)) 736 return (-1); 737 738 vie->op = one_byte_opcodes[x]; 739 740 if (vie->op.op_type == VIE_OP_TYPE_NONE) 741 return (-1); 742 743 vie_advance(vie); 744 745 if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) 746 return (decode_two_byte_opcode(vie)); 747 748 return (0); 749} 750 751static int 752decode_modrm(struct vie *vie) 753{ 754 uint8_t x; 755 enum cpu_mode cpu_mode; 756 757 /* 758 * XXX assuming that guest is in IA-32E 64-bit mode 759 */ 760 cpu_mode = CPU_MODE_64BIT; 761 762 if (vie_peek(vie, &x)) 763 return (-1); 764 765 vie->mod = (x >> 6) & 0x3; 766 vie->rm = (x >> 0) & 0x7; 767 vie->reg = (x >> 3) & 0x7; 768 769 /* 770 * A direct addressing mode makes no sense in the context of an EPT 771 * fault. There has to be a memory access involved to cause the 772 * EPT fault. 773 */ 774 if (vie->mod == VIE_MOD_DIRECT) 775 return (-1); 776 777 if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 778 (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 779 /* 780 * Table 2-5: Special Cases of REX Encodings 781 * 782 * mod=0, r/m=5 is used in the compatibility mode to 783 * indicate a disp32 without a base register. 784 * 785 * mod!=3, r/m=4 is used in the compatibility mode to 786 * indicate that the SIB byte is present. 787 * 788 * The 'b' bit in the REX prefix is don't care in 789 * this case. 790 */ 791 } else { 792 vie->rm |= (vie->rex_b << 3); 793 } 794 795 vie->reg |= (vie->rex_r << 3); 796 797 /* SIB */ 798 if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 799 goto done; 800 801 vie->base_register = gpr_map[vie->rm]; 802 803 switch (vie->mod) { 804 case VIE_MOD_INDIRECT_DISP8: 805 vie->disp_bytes = 1; 806 break; 807 case VIE_MOD_INDIRECT_DISP32: 808 vie->disp_bytes = 4; 809 break; 810 case VIE_MOD_INDIRECT: 811 if (vie->rm == VIE_RM_DISP32) { 812 vie->disp_bytes = 4; 813 /* 814 * Table 2-7. RIP-Relative Addressing 815 * 816 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 817 * whereas in compatibility mode it just implies disp32. 818 */ 819 820 if (cpu_mode == CPU_MODE_64BIT) 821 vie->base_register = VM_REG_GUEST_RIP; 822 else 823 vie->base_register = VM_REG_LAST; 824 } 825 break; 826 } 827 828done: 829 vie_advance(vie); 830 831 return (0); 832} 833 834static int 835decode_sib(struct vie *vie) 836{ 837 uint8_t x; 838 839 /* Proceed only if SIB byte is present */ 840 if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 841 return (0); 842 843 if (vie_peek(vie, &x)) 844 return (-1); 845 846 /* De-construct the SIB byte */ 847 vie->ss = (x >> 6) & 0x3; 848 vie->index = (x >> 3) & 0x7; 849 vie->base = (x >> 0) & 0x7; 850 851 /* Apply the REX prefix modifiers */ 852 vie->index |= vie->rex_x << 3; 853 vie->base |= vie->rex_b << 3; 854 855 switch (vie->mod) { 856 case VIE_MOD_INDIRECT_DISP8: 857 vie->disp_bytes = 1; 858 break; 859 case VIE_MOD_INDIRECT_DISP32: 860 vie->disp_bytes = 4; 861 break; 862 } 863 864 if (vie->mod == VIE_MOD_INDIRECT && 865 (vie->base == 5 || vie->base == 13)) { 866 /* 867 * Special case when base register is unused if mod = 0 868 * and base = %rbp or %r13. 869 * 870 * Documented in: 871 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 872 * Table 2-5: Special Cases of REX Encodings 873 */ 874 vie->disp_bytes = 4; 875 } else { 876 vie->base_register = gpr_map[vie->base]; 877 } 878 879 /* 880 * All encodings of 'index' are valid except for %rsp (4). 881 * 882 * Documented in: 883 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 884 * Table 2-5: Special Cases of REX Encodings 885 */ 886 if (vie->index != 4) 887 vie->index_register = gpr_map[vie->index]; 888 889 /* 'scale' makes sense only in the context of an index register */ 890 if (vie->index_register < VM_REG_LAST) 891 vie->scale = 1 << vie->ss; 892 893 vie_advance(vie); 894 895 return (0); 896} 897 898static int 899decode_displacement(struct vie *vie) 900{ 901 int n, i; 902 uint8_t x; 903 904 union { 905 char buf[4]; 906 int8_t signed8; 907 int32_t signed32; 908 } u; 909 910 if ((n = vie->disp_bytes) == 0) 911 return (0); 912 913 if (n != 1 && n != 4) 914 panic("decode_displacement: invalid disp_bytes %d", n); 915 916 for (i = 0; i < n; i++) { 917 if (vie_peek(vie, &x)) 918 return (-1); 919 920 u.buf[i] = x; 921 vie_advance(vie); 922 } 923 924 if (n == 1) 925 vie->displacement = u.signed8; /* sign-extended */ 926 else 927 vie->displacement = u.signed32; /* sign-extended */ 928 929 return (0); 930} 931 932static int 933decode_immediate(struct vie *vie) 934{ 935 int i, n; 936 uint8_t x; 937 union { 938 char buf[4]; 939 int8_t signed8; 940 int32_t signed32; 941 } u; 942 943 /* Figure out immediate operand size (if any) */ 944 if (vie->op.op_flags & VIE_OP_F_IMM) 945 vie->imm_bytes = 4; 946 else if (vie->op.op_flags & VIE_OP_F_IMM8) 947 vie->imm_bytes = 1; 948 949 if ((n = vie->imm_bytes) == 0) 950 return (0); 951 952 if (n != 1 && n != 4) 953 panic("decode_immediate: invalid imm_bytes %d", n); 954 955 for (i = 0; i < n; i++) { 956 if (vie_peek(vie, &x)) 957 return (-1); 958 959 u.buf[i] = x; 960 vie_advance(vie); 961 } 962 963 if (n == 1) 964 vie->immediate = u.signed8; /* sign-extended */ 965 else 966 vie->immediate = u.signed32; /* sign-extended */ 967 968 return (0); 969} 970 971/* 972 * Verify that all the bytes in the instruction buffer were consumed. 973 */ 974static int 975verify_inst_length(struct vie *vie) 976{ 977 978 if (vie->num_processed == vie->num_valid) 979 return (0); 980 else 981 return (-1); 982} 983 984/* 985 * Verify that the 'guest linear address' provided as collateral of the nested 986 * page table fault matches with our instruction decoding. 987 */ 988static int 989verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 990{ 991 int error; 992 uint64_t base, idx; 993 994 /* Skip 'gla' verification */ 995 if (gla == VIE_INVALID_GLA) 996 return (0); 997 998 base = 0; 999 if (vie->base_register != VM_REG_LAST) { 1000 error = vm_get_register(vm, cpuid, vie->base_register, &base); 1001 if (error) { 1002 printf("verify_gla: error %d getting base reg %d\n", 1003 error, vie->base_register); 1004 return (-1); 1005 } 1006 1007 /* 1008 * RIP-relative addressing starts from the following 1009 * instruction 1010 */ 1011 if (vie->base_register == VM_REG_GUEST_RIP) 1012 base += vie->num_valid; 1013 } 1014 1015 idx = 0; 1016 if (vie->index_register != VM_REG_LAST) { 1017 error = vm_get_register(vm, cpuid, vie->index_register, &idx); 1018 if (error) { 1019 printf("verify_gla: error %d getting index reg %d\n", 1020 error, vie->index_register); 1021 return (-1); 1022 } 1023 } 1024 1025 if (base + vie->scale * idx + vie->displacement != gla) { 1026 printf("verify_gla mismatch: " 1027 "base(0x%0lx), scale(%d), index(0x%0lx), " 1028 "disp(0x%0lx), gla(0x%0lx)\n", 1029 base, vie->scale, idx, vie->displacement, gla); 1030 return (-1); 1031 } 1032 1033 return (0); 1034} 1035 1036int 1037vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 1038{ 1039 1040 if (decode_rex(vie)) 1041 return (-1); 1042 1043 if (decode_opcode(vie)) 1044 return (-1); 1045 1046 if (decode_modrm(vie)) 1047 return (-1); 1048 1049 if (decode_sib(vie)) 1050 return (-1); 1051 1052 if (decode_displacement(vie)) 1053 return (-1); 1054 1055 if (decode_immediate(vie)) 1056 return (-1); 1057 1058 if (verify_inst_length(vie)) 1059 return (-1); 1060 1061 if (verify_gla(vm, cpuid, gla, vie)) 1062 return (-1); 1063 1064 vie->decoded = 1; /* success */ 1065 1066 return (0); 1067} 1068#endif /* _KERNEL */ 1069