1/*- 2 * Copyright (c) 2012 Sandvine, Inc. 3 * Copyright (c) 2012 NetApp, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD$"); 32 33#ifdef _KERNEL 34#include <sys/param.h> 35#include <sys/pcpu.h> 36#include <sys/systm.h> 37 38#include <vm/vm.h> 39#include <vm/pmap.h> 40 41#include <machine/pmap.h> 42#include <machine/vmparam.h> 43#include <machine/vmm.h> 44#else /* !_KERNEL */ 45#include <sys/types.h> 46#include <sys/errno.h> 47 48#include <machine/vmm.h> 49 50#include <vmmapi.h> 51#endif /* _KERNEL */ 52 53enum cpu_mode { 54 CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ 55 CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ 56}; 57 58/* struct vie_op.op_type */ 59enum { 60 VIE_OP_TYPE_NONE = 0, 61 VIE_OP_TYPE_MOV, 62 VIE_OP_TYPE_AND, 63 VIE_OP_TYPE_OR, 64 VIE_OP_TYPE_LAST 65}; 66 67/* struct vie_op.op_flags */ 68#define VIE_OP_F_IMM (1 << 0) /* immediate operand present */ 69#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 70 71static const struct vie_op one_byte_opcodes[256] = { 72 [0x88] = { 73 .op_byte = 0x88, 74 .op_type = VIE_OP_TYPE_MOV, 75 }, 76 [0x89] = { 77 .op_byte = 0x89, 78 .op_type = VIE_OP_TYPE_MOV, 79 }, 80 [0x8A] = { 81 .op_byte = 0x8A, 82 .op_type = VIE_OP_TYPE_MOV, 83 }, 84 [0x8B] = { 85 .op_byte = 0x8B, 86 .op_type = VIE_OP_TYPE_MOV, 87 }, 88 [0xC7] = { 89 .op_byte = 0xC7, 90 .op_type = VIE_OP_TYPE_MOV, 91 .op_flags = VIE_OP_F_IMM, 92 }, 93 [0x23] = { 94 .op_byte = 0x23, 95 .op_type = VIE_OP_TYPE_AND, 96 }, 97 [0x81] = { 98 /* XXX Group 1 extended opcode - not just AND */ 99 .op_byte = 0x81, 100 .op_type = VIE_OP_TYPE_AND, 101 .op_flags = VIE_OP_F_IMM, 102 }, 103 [0x83] = { 104 /* XXX Group 1 extended opcode - not just OR */ 105 .op_byte = 0x83, 106 .op_type = VIE_OP_TYPE_OR, 107 .op_flags = VIE_OP_F_IMM8, 108 }, 109}; 110 111/* struct vie.mod */ 112#define VIE_MOD_INDIRECT 0 113#define VIE_MOD_INDIRECT_DISP8 1 114#define VIE_MOD_INDIRECT_DISP32 2 115#define VIE_MOD_DIRECT 3 116 117/* struct vie.rm */ 118#define VIE_RM_SIB 4 119#define VIE_RM_DISP32 5 120 121#define GB (1024 * 1024 * 1024) 122 123static enum vm_reg_name gpr_map[16] = { 124 VM_REG_GUEST_RAX, 125 VM_REG_GUEST_RCX, 126 VM_REG_GUEST_RDX, 127 VM_REG_GUEST_RBX, 128 VM_REG_GUEST_RSP, 129 VM_REG_GUEST_RBP, 130 VM_REG_GUEST_RSI, 131 VM_REG_GUEST_RDI, 132 VM_REG_GUEST_R8, 133 VM_REG_GUEST_R9, 134 VM_REG_GUEST_R10, 135 VM_REG_GUEST_R11, 136 VM_REG_GUEST_R12, 137 VM_REG_GUEST_R13, 138 VM_REG_GUEST_R14, 139 VM_REG_GUEST_R15 140}; 141 142static uint64_t size2mask[] = { 143 [1] = 0xff, 144 [2] = 0xffff, 145 [4] = 0xffffffff, 146 [8] = 0xffffffffffffffff, 147}; 148 149static int 150vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 151{ 152 int error; 153 154 error = vm_get_register(vm, vcpuid, reg, rval); 155 156 return (error); 157} 158 159static int 160vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 161{ 162 uint64_t val; 163 int error, rshift; 164 enum vm_reg_name reg; 165 166 rshift = 0; 167 reg = gpr_map[vie->reg]; 168 169 /* 170 * 64-bit mode imposes limitations on accessing legacy byte registers. 171 * 172 * The legacy high-byte registers cannot be addressed if the REX 173 * prefix is present. In this case the values 4, 5, 6 and 7 of the 174 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 175 * 176 * If the REX prefix is not present then the values 4, 5, 6 and 7 177 * of the 'ModRM:reg' field address the legacy high-byte registers, 178 * %ah, %ch, %dh and %bh respectively. 179 */ 180 if (!vie->rex_present) { 181 if (vie->reg & 0x4) { 182 /* 183 * Obtain the value of %ah by reading %rax and shifting 184 * right by 8 bits (same for %bh, %ch and %dh). 185 */ 186 rshift = 8; 187 reg = gpr_map[vie->reg & 0x3]; 188 } 189 } 190 191 error = vm_get_register(vm, vcpuid, reg, &val); 192 *rval = val >> rshift; 193 return (error); 194} 195 196static int 197vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 198 uint64_t val, int size) 199{ 200 int error; 201 uint64_t origval; 202 203 switch (size) { 204 case 1: 205 case 2: 206 error = vie_read_register(vm, vcpuid, reg, &origval); 207 if (error) 208 return (error); 209 val &= size2mask[size]; 210 val |= origval & ~size2mask[size]; 211 break; 212 case 4: 213 val &= 0xffffffffUL; 214 break; 215 case 8: 216 break; 217 default: 218 return (EINVAL); 219 } 220 221 error = vm_set_register(vm, vcpuid, reg, val); 222 return (error); 223} 224 225/* 226 * The following simplifying assumptions are made during emulation: 227 * 228 * - guest is in 64-bit mode 229 * - default address size is 64-bits 230 * - default operand size is 32-bits 231 * 232 * - operand size override is not supported 233 * 234 * - address size override is not supported 235 */ 236static int 237emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 238 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 239{ 240 int error, size; 241 enum vm_reg_name reg; 242 uint8_t byte; 243 uint64_t val; 244 245 size = 4; 246 error = EINVAL; 247 248 switch (vie->op.op_byte) { 249 case 0x88: 250 /* 251 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 252 * 88/r: mov r/m8, r8 253 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 254 */ 255 size = 1; 256 error = vie_read_bytereg(vm, vcpuid, vie, &byte); 257 if (error == 0) 258 error = memwrite(vm, vcpuid, gpa, byte, size, arg); 259 break; 260 case 0x89: 261 /* 262 * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 263 * 89/r: mov r/m32, r32 264 * REX.W + 89/r mov r/m64, r64 265 */ 266 if (vie->rex_w) 267 size = 8; 268 reg = gpr_map[vie->reg]; 269 error = vie_read_register(vm, vcpuid, reg, &val); 270 if (error == 0) { 271 val &= size2mask[size]; 272 error = memwrite(vm, vcpuid, gpa, val, size, arg); 273 } 274 break; 275 case 0x8A: 276 case 0x8B: 277 /* 278 * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 279 * 8A/r: mov r/m8, r8 280 * REX + 8A/r: mov r/m8, r8 281 * 8B/r: mov r32, r/m32 282 * REX.W 8B/r: mov r64, r/m64 283 */ 284 if (vie->op.op_byte == 0x8A) 285 size = 1; 286 else if (vie->rex_w) 287 size = 8; 288 error = memread(vm, vcpuid, gpa, &val, size, arg); 289 if (error == 0) { 290 reg = gpr_map[vie->reg]; 291 error = vie_update_register(vm, vcpuid, reg, val, size); 292 } 293 break; 294 case 0xC7: 295 /* 296 * MOV from imm32 to mem (ModRM:r/m) 297 * C7/0 mov r/m32, imm32 298 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 299 */ 300 val = vie->immediate; /* already sign-extended */ 301 302 if (vie->rex_w) 303 size = 8; 304 305 if (size != 8) 306 val &= size2mask[size]; 307 308 error = memwrite(vm, vcpuid, gpa, val, size, arg); 309 break; 310 default: 311 break; 312 } 313 314 return (error); 315} 316 317static int 318emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 319 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 320{ 321 int error, size; 322 enum vm_reg_name reg; 323 uint64_t val1, val2; 324 325 size = 4; 326 error = EINVAL; 327 328 switch (vie->op.op_byte) { 329 case 0x23: 330 /* 331 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 332 * result in reg. 333 * 334 * 23/r and r32, r/m32 335 * REX.W + 23/r and r64, r/m64 336 */ 337 if (vie->rex_w) 338 size = 8; 339 340 /* get the first operand */ 341 reg = gpr_map[vie->reg]; 342 error = vie_read_register(vm, vcpuid, reg, &val1); 343 if (error) 344 break; 345 346 /* get the second operand */ 347 error = memread(vm, vcpuid, gpa, &val2, size, arg); 348 if (error) 349 break; 350 351 /* perform the operation and write the result */ 352 val1 &= val2; 353 error = vie_update_register(vm, vcpuid, reg, val1, size); 354 break; 355 case 0x81: 356 /* 357 * AND mem (ModRM:r/m) with immediate and store the 358 * result in mem. 359 * 360 * 81/ and r/m32, imm32 361 * REX.W + 81/ and r/m64, imm32 sign-extended to 64 362 * 363 * Currently, only the AND operation of the 0x81 opcode 364 * is implemented (ModRM:reg = b100). 365 */ 366 if ((vie->reg & 7) != 4) 367 break; 368 369 if (vie->rex_w) 370 size = 8; 371 372 /* get the first operand */ 373 error = memread(vm, vcpuid, gpa, &val1, size, arg); 374 if (error) 375 break; 376 377 /* 378 * perform the operation with the pre-fetched immediate 379 * operand and write the result 380 */ 381 val1 &= vie->immediate; 382 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 383 break; 384 default: 385 break; 386 } 387 return (error); 388} 389 390static int 391emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 392 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 393{ 394 int error, size; 395 uint64_t val1; 396 397 size = 4; 398 error = EINVAL; 399 400 switch (vie->op.op_byte) { 401 case 0x83: 402 /* 403 * OR mem (ModRM:r/m) with immediate and store the 404 * result in mem. 405 * 406 * 83/ OR r/m32, imm8 sign-extended to 32 407 * REX.W + 83/ OR r/m64, imm8 sign-extended to 64 408 * 409 * Currently, only the OR operation of the 0x83 opcode 410 * is implemented (ModRM:reg = b001). 411 */ 412 if ((vie->reg & 7) != 1) 413 break; 414 415 if (vie->rex_w) 416 size = 8; 417 418 /* get the first operand */ 419 error = memread(vm, vcpuid, gpa, &val1, size, arg); 420 if (error) 421 break; 422 423 /* 424 * perform the operation with the pre-fetched immediate 425 * operand and write the result 426 */ 427 val1 |= vie->immediate; 428 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 429 break; 430 default: 431 break; 432 } 433 return (error); 434} 435 436int 437vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 438 mem_region_read_t memread, mem_region_write_t memwrite, 439 void *memarg) 440{ 441 int error; 442 443 if (!vie->decoded) 444 return (EINVAL); 445 446 switch (vie->op.op_type) { 447 case VIE_OP_TYPE_MOV: 448 error = emulate_mov(vm, vcpuid, gpa, vie, 449 memread, memwrite, memarg); 450 break; 451 case VIE_OP_TYPE_AND: 452 error = emulate_and(vm, vcpuid, gpa, vie, 453 memread, memwrite, memarg); 454 break; 455 case VIE_OP_TYPE_OR: 456 error = emulate_or(vm, vcpuid, gpa, vie, 457 memread, memwrite, memarg); 458 break; 459 default: 460 error = EINVAL; 461 break; 462 } 463 464 return (error); 465} 466 467#ifdef _KERNEL 468void 469vie_init(struct vie *vie) 470{ 471 472 bzero(vie, sizeof(struct vie)); 473 474 vie->base_register = VM_REG_LAST; 475 vie->index_register = VM_REG_LAST; 476} 477 478static int 479gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, 480 uint64_t *gpa, uint64_t *gpaend) 481{ 482 int nlevels, ptpshift, ptpindex; 483 uint64_t *ptpbase, pte, pgsize; 484 void *cookie; 485 486 /* 487 * XXX assumes 64-bit guest with 4 page walk levels 488 */ 489 nlevels = 4; 490 while (--nlevels >= 0) { 491 /* Zero out the lower 12 bits and the upper 12 bits */ 492 ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 493 494 ptpbase = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, VM_PROT_READ, 495 &cookie); 496 if (ptpbase == NULL) 497 goto error; 498 499 ptpshift = PAGE_SHIFT + nlevels * 9; 500 ptpindex = (gla >> ptpshift) & 0x1FF; 501 pgsize = 1UL << ptpshift; 502 503 pte = ptpbase[ptpindex]; 504 505 vm_gpa_release(cookie); 506 507 if ((pte & PG_V) == 0) 508 goto error; 509 510 if (pte & PG_PS) { 511 if (pgsize > 1 * GB) 512 goto error; 513 else 514 break; 515 } 516 517 ptpphys = pte; 518 } 519 520 /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 521 pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 522 *gpa = pte | (gla & (pgsize - 1)); 523 *gpaend = pte + pgsize; 524 return (0); 525 526error: 527 return (-1); 528} 529 530int 531vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, 532 uint64_t cr3, struct vie *vie) 533{ 534 int n, err, prot; 535 uint64_t gpa, gpaend, off; 536 void *hpa, *cookie; 537 538 /* 539 * XXX cache previously fetched instructions using 'rip' as the tag 540 */ 541 542 prot = VM_PROT_READ | VM_PROT_EXECUTE; 543 if (inst_length > VIE_INST_SIZE) 544 panic("vmm_fetch_instruction: invalid length %d", inst_length); 545 546 /* Copy the instruction into 'vie' */ 547 while (vie->num_valid < inst_length) { 548 err = gla2gpa(vm, rip, cr3, &gpa, &gpaend); 549 if (err) 550 break; 551 552 off = gpa & PAGE_MASK; 553 n = min(inst_length - vie->num_valid, PAGE_SIZE - off); 554 555 if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL) 556 break; 557 558 bcopy(hpa, &vie->inst[vie->num_valid], n); 559 560 vm_gpa_release(cookie); 561 562 rip += n; 563 vie->num_valid += n; 564 } 565 566 if (vie->num_valid == inst_length) 567 return (0); 568 else 569 return (-1); 570} 571 572static int 573vie_peek(struct vie *vie, uint8_t *x) 574{ 575 576 if (vie->num_processed < vie->num_valid) { 577 *x = vie->inst[vie->num_processed]; 578 return (0); 579 } else 580 return (-1); 581} 582 583static void 584vie_advance(struct vie *vie) 585{ 586 587 vie->num_processed++; 588} 589 590static int 591decode_rex(struct vie *vie) 592{ 593 uint8_t x; 594 595 if (vie_peek(vie, &x)) 596 return (-1); 597 598 if (x >= 0x40 && x <= 0x4F) { 599 vie->rex_present = 1; 600 601 vie->rex_w = x & 0x8 ? 1 : 0; 602 vie->rex_r = x & 0x4 ? 1 : 0; 603 vie->rex_x = x & 0x2 ? 1 : 0; 604 vie->rex_b = x & 0x1 ? 1 : 0; 605 606 vie_advance(vie); 607 } 608 609 return (0); 610} 611 612static int 613decode_opcode(struct vie *vie) 614{ 615 uint8_t x; 616 617 if (vie_peek(vie, &x)) 618 return (-1); 619 620 vie->op = one_byte_opcodes[x]; 621 622 if (vie->op.op_type == VIE_OP_TYPE_NONE) 623 return (-1); 624 625 vie_advance(vie); 626 return (0); 627} 628 629static int 630decode_modrm(struct vie *vie) 631{ 632 uint8_t x; 633 enum cpu_mode cpu_mode; 634 635 /* 636 * XXX assuming that guest is in IA-32E 64-bit mode 637 */ 638 cpu_mode = CPU_MODE_64BIT; 639 640 if (vie_peek(vie, &x)) 641 return (-1); 642 643 vie->mod = (x >> 6) & 0x3; 644 vie->rm = (x >> 0) & 0x7; 645 vie->reg = (x >> 3) & 0x7; 646 647 /* 648 * A direct addressing mode makes no sense in the context of an EPT 649 * fault. There has to be a memory access involved to cause the 650 * EPT fault. 651 */ 652 if (vie->mod == VIE_MOD_DIRECT) 653 return (-1); 654 655 if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 656 (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 657 /* 658 * Table 2-5: Special Cases of REX Encodings 659 * 660 * mod=0, r/m=5 is used in the compatibility mode to 661 * indicate a disp32 without a base register. 662 * 663 * mod!=3, r/m=4 is used in the compatibility mode to 664 * indicate that the SIB byte is present. 665 * 666 * The 'b' bit in the REX prefix is don't care in 667 * this case. 668 */ 669 } else { 670 vie->rm |= (vie->rex_b << 3); 671 } 672 673 vie->reg |= (vie->rex_r << 3); 674 675 /* SIB */ 676 if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 677 goto done; 678 679 vie->base_register = gpr_map[vie->rm]; 680 681 switch (vie->mod) { 682 case VIE_MOD_INDIRECT_DISP8: 683 vie->disp_bytes = 1; 684 break; 685 case VIE_MOD_INDIRECT_DISP32: 686 vie->disp_bytes = 4; 687 break; 688 case VIE_MOD_INDIRECT: 689 if (vie->rm == VIE_RM_DISP32) { 690 vie->disp_bytes = 4; 691 /* 692 * Table 2-7. RIP-Relative Addressing 693 * 694 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 695 * whereas in compatibility mode it just implies disp32. 696 */ 697 698 if (cpu_mode == CPU_MODE_64BIT) 699 vie->base_register = VM_REG_GUEST_RIP; 700 else 701 vie->base_register = VM_REG_LAST; 702 } 703 break; 704 } 705 706done: 707 vie_advance(vie); 708 709 return (0); 710} 711 712static int 713decode_sib(struct vie *vie) 714{ 715 uint8_t x; 716 717 /* Proceed only if SIB byte is present */ 718 if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 719 return (0); 720 721 if (vie_peek(vie, &x)) 722 return (-1); 723 724 /* De-construct the SIB byte */ 725 vie->ss = (x >> 6) & 0x3; 726 vie->index = (x >> 3) & 0x7; 727 vie->base = (x >> 0) & 0x7; 728 729 /* Apply the REX prefix modifiers */ 730 vie->index |= vie->rex_x << 3; 731 vie->base |= vie->rex_b << 3; 732 733 switch (vie->mod) { 734 case VIE_MOD_INDIRECT_DISP8: 735 vie->disp_bytes = 1; 736 break; 737 case VIE_MOD_INDIRECT_DISP32: 738 vie->disp_bytes = 4; 739 break; 740 } 741 742 if (vie->mod == VIE_MOD_INDIRECT && 743 (vie->base == 5 || vie->base == 13)) { 744 /* 745 * Special case when base register is unused if mod = 0 746 * and base = %rbp or %r13. 747 * 748 * Documented in: 749 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 750 * Table 2-5: Special Cases of REX Encodings 751 */ 752 vie->disp_bytes = 4; 753 } else { 754 vie->base_register = gpr_map[vie->base]; 755 } 756 757 /* 758 * All encodings of 'index' are valid except for %rsp (4). 759 * 760 * Documented in: 761 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 762 * Table 2-5: Special Cases of REX Encodings 763 */ 764 if (vie->index != 4) 765 vie->index_register = gpr_map[vie->index]; 766 767 /* 'scale' makes sense only in the context of an index register */ 768 if (vie->index_register < VM_REG_LAST) 769 vie->scale = 1 << vie->ss; 770 771 vie_advance(vie); 772 773 return (0); 774} 775 776static int 777decode_displacement(struct vie *vie) 778{ 779 int n, i; 780 uint8_t x; 781 782 union { 783 char buf[4]; 784 int8_t signed8; 785 int32_t signed32; 786 } u; 787 788 if ((n = vie->disp_bytes) == 0) 789 return (0); 790 791 if (n != 1 && n != 4) 792 panic("decode_displacement: invalid disp_bytes %d", n); 793 794 for (i = 0; i < n; i++) { 795 if (vie_peek(vie, &x)) 796 return (-1); 797 798 u.buf[i] = x; 799 vie_advance(vie); 800 } 801 802 if (n == 1) 803 vie->displacement = u.signed8; /* sign-extended */ 804 else 805 vie->displacement = u.signed32; /* sign-extended */ 806 807 return (0); 808} 809 810static int 811decode_immediate(struct vie *vie) 812{ 813 int i, n; 814 uint8_t x; 815 union { 816 char buf[4]; 817 int8_t signed8; 818 int32_t signed32; 819 } u; 820 821 /* Figure out immediate operand size (if any) */ 822 if (vie->op.op_flags & VIE_OP_F_IMM) 823 vie->imm_bytes = 4; 824 else if (vie->op.op_flags & VIE_OP_F_IMM8) 825 vie->imm_bytes = 1; 826 827 if ((n = vie->imm_bytes) == 0) 828 return (0); 829 830 if (n != 1 && n != 4) 831 panic("decode_immediate: invalid imm_bytes %d", n); 832 833 for (i = 0; i < n; i++) { 834 if (vie_peek(vie, &x)) 835 return (-1); 836 837 u.buf[i] = x; 838 vie_advance(vie); 839 } 840 841 if (n == 1) 842 vie->immediate = u.signed8; /* sign-extended */ 843 else 844 vie->immediate = u.signed32; /* sign-extended */ 845 846 return (0); 847} 848 849/* 850 * Verify that all the bytes in the instruction buffer were consumed. 851 */ 852static int 853verify_inst_length(struct vie *vie) 854{ 855 856 if (vie->num_processed == vie->num_valid) 857 return (0); 858 else 859 return (-1); 860} 861 862/* 863 * Verify that the 'guest linear address' provided as collateral of the nested 864 * page table fault matches with our instruction decoding. 865 */ 866static int 867verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 868{ 869 int error; 870 uint64_t base, idx; 871 872 /* Skip 'gla' verification */ 873 if (gla == VIE_INVALID_GLA) 874 return (0); 875 876 base = 0; 877 if (vie->base_register != VM_REG_LAST) { 878 error = vm_get_register(vm, cpuid, vie->base_register, &base); 879 if (error) { 880 printf("verify_gla: error %d getting base reg %d\n", 881 error, vie->base_register); 882 return (-1); 883 } 884 885 /* 886 * RIP-relative addressing starts from the following 887 * instruction 888 */ 889 if (vie->base_register == VM_REG_GUEST_RIP) 890 base += vie->num_valid; 891 } 892 893 idx = 0; 894 if (vie->index_register != VM_REG_LAST) { 895 error = vm_get_register(vm, cpuid, vie->index_register, &idx); 896 if (error) { 897 printf("verify_gla: error %d getting index reg %d\n", 898 error, vie->index_register); 899 return (-1); 900 } 901 } 902 903 if (base + vie->scale * idx + vie->displacement != gla) { 904 printf("verify_gla mismatch: " 905 "base(0x%0lx), scale(%d), index(0x%0lx), " 906 "disp(0x%0lx), gla(0x%0lx)\n", 907 base, vie->scale, idx, vie->displacement, gla); 908 return (-1); 909 } 910 911 return (0); 912} 913 914int 915vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 916{ 917 918 if (decode_rex(vie)) 919 return (-1); 920 921 if (decode_opcode(vie)) 922 return (-1); 923 924 if (decode_modrm(vie)) 925 return (-1); 926 927 if (decode_sib(vie)) 928 return (-1); 929 930 if (decode_displacement(vie)) 931 return (-1); 932 933 if (decode_immediate(vie)) 934 return (-1); 935 936 if (verify_inst_length(vie)) 937 return (-1); 938 939 if (verify_gla(vm, cpuid, gla, vie)) 940 return (-1); 941 942 vie->decoded = 1; /* success */ 943 944 return (0); 945} 946#endif /* _KERNEL */ 947