Deleted Added
sdiff udiff text old ( 284894 ) new ( 284900 )
full compact
1/*-
2 * Copyright (c) 2014 Neel Natu <neel@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright

--- 11 unchanged lines hidden (view full) ---

20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/task_switch.c 284900 2015-06-28 03:22:26Z neel $");
29
30#include <sys/param.h>
31#include <sys/_iovec.h>
32#include <sys/mman.h>
33
34#include <x86/psl.h>
35#include <x86/segments.h>
36#include <x86/specialreg.h>

--- 160 unchanged lines hidden (view full) ---

197 * by the selector 'sel'.
198 *
199 * Returns 0 on success.
200 * Returns 1 if an exception was injected into the guest.
201 * Returns -1 otherwise.
202 */
203static int
204desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
205 uint16_t sel, struct user_segment_descriptor *desc, bool doread,
206 int *faultptr)
207{
208 struct iovec iov[2];
209 uint64_t base;
210 uint32_t limit, access;
211 int error, reg;
212
213 reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
214 error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
215 assert(error == 0);
216 assert(limit >= SEL_LIMIT(sel));
217
218 error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
219 sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
220 faultptr);
221 if (error || *faultptr)
222 return (error);
223
224 if (doread)
225 vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
226 else
227 vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
228 return (0);
229}
230
231static int
232desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
233 uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
234{
235 return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr));
236}
237
238static int
239desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
240 uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
241{
242 return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr));
243}
244
245/*
246 * Read the TSS descriptor referenced by 'sel' into 'desc'.
247 *
248 * Returns 0 on success.
249 * Returns 1 if an exception was injected into the guest.
250 * Returns -1 otherwise.
251 */
252static int
253read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
254 uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
255{
256 struct vm_guest_paging sup_paging;
257 int error;
258
259 assert(!ISLDT(sel));
260 assert(IDXSEL(sel) != 0);
261
262 /* Fetch the new TSS descriptor */
263 if (desc_table_limit_check(ctx, vcpu, sel)) {
264 if (ts->reason == TSR_IRET)
265 sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
266 else
267 sel_exception(ctx, vcpu, IDT_GP, sel, ts->ext);
268 return (1);
269 }
270
271 sup_paging = ts->paging;
272 sup_paging.cpl = 0; /* implicit supervisor mode */
273 error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr);
274 return (error);
275}
276
277static bool
278code_desc(int sd_type)
279{
280 /* code descriptor */
281 return ((sd_type & 0x18) == 0x18);

--- 17 unchanged lines hidden (view full) ---

299ldt_desc(int sd_type)
300{
301
302 return (sd_type == SDT_SYSLDT);
303}
304
305/*
306 * Validate the descriptor 'seg_desc' associated with 'segment'.
307 */
308static int
309validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
310 int segment, struct seg_desc *seg_desc, int *faultptr)
311{
312 struct vm_guest_paging sup_paging;
313 struct user_segment_descriptor usd;
314 int error, idtvec;
315 int cpl, dpl, rpl;
316 uint16_t sel, cs;
317 bool ldtseg, codeseg, stackseg, dataseg, conforming;
318

--- 44 unchanged lines hidden (view full) ---

363 seg_desc->limit = 0;
364 seg_desc->access = 0x10000; /* unusable */
365 return (0);
366 }
367
368 /* Read the descriptor from the GDT/LDT */
369 sup_paging = ts->paging;
370 sup_paging.cpl = 0; /* implicit supervisor mode */
371 error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr);
372 if (error || *faultptr)
373 return (error);
374
375 /* Verify that the descriptor type is compatible with the segment */
376 if ((ldtseg && !ldt_desc(usd.sd_type)) ||
377 (codeseg && !code_desc(usd.sd_type)) ||
378 (dataseg && !data_desc(usd.sd_type)) ||
379 (stackseg && !stack_desc(usd.sd_type))) {
380 sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);

--- 89 unchanged lines hidden (view full) ---

470 int error;
471
472 error = vm_set_desc(ctx, vcpu, reg, sd->base, sd->limit, sd->access);
473 assert(error == 0);
474}
475
476/*
477 * Update the vcpu registers to reflect the state of the new task.
478 */
479static int
480tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
481 uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
482{
483 struct seg_desc seg_desc, seg_desc2;
484 uint64_t *pdpte, maxphyaddr, reserved;
485 uint32_t eflags;
486 int error, i;
487 bool nested;
488
489 nested = false;

--- 65 unchanged lines hidden (view full) ---

555 /*
556 * If this is a nested task then write out the new TSS to update
557 * the previous link field.
558 */
559 if (nested)
560 vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss));
561
562 /* Validate segment descriptors */
563 error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
564 faultptr);
565 if (error || *faultptr)
566 return (error);
567 update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc);
568
569 /*
570 * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
571 *
572 * The SS and CS attribute checks on VM-entry are inter-dependent so
573 * we need to make sure that both segments are valid before updating
574 * either of them. This ensures that the VMCS state can pass the
575 * VM-entry checks so the guest can handle any exception injected
576 * during task switch emulation.
577 */
578 error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
579 faultptr);
580 if (error || *faultptr)
581 return (error);
582
583 error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
584 faultptr);
585 if (error || *faultptr)
586 return (error);
587 update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc);
588 update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2);
589 ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
590
591 error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
592 faultptr);
593 if (error || *faultptr)
594 return (error);
595 update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc);
596
597 error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
598 faultptr);
599 if (error || *faultptr)
600 return (error);
601 update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc);
602
603 error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
604 faultptr);
605 if (error || *faultptr)
606 return (error);
607 update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc);
608
609 error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
610 faultptr);
611 if (error || *faultptr)
612 return (error);
613 update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc);
614
615 return (0);
616}
617
618/*
619 * Push an error code on the stack of the new task. This is needed if the
620 * task switch was triggered by a hardware exception that causes an error
621 * code to be saved (e.g. #PF).
622 */
623static int
624push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
625 int task_type, uint32_t errcode, int *faultptr)
626{
627 struct iovec iov[2];
628 struct seg_desc seg_desc;
629 int stacksize, bytes, error;
630 uint64_t gla, cr0, rflags;
631 uint32_t esp;
632 uint16_t stacksel;
633
634 *faultptr = 0;
635
636 cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
637 rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
638 stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
639
640 error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base,
641 &seg_desc.limit, &seg_desc.access);
642 assert(error == 0);
643

--- 18 unchanged lines hidden (view full) ---

662 stacksize = 2;
663
664 esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
665 esp -= bytes;
666
667 if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
668 &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
669 sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
670 *faultptr = 1;
671 return (0);
672 }
673
674 if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
675 vm_inject_ac(ctx, vcpu, 1);
676 *faultptr = 1;
677 return (0);
678 }
679
680 error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
681 iov, nitems(iov), faultptr);
682 if (error || *faultptr)
683 return (error);
684
685 vm_copyout(ctx, vcpu, &errcode, iov, bytes);
686 SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp);
687 return (0);
688}
689
690/*
691 * Evaluate return value from helper functions and potentially return to
692 * the VM run loop.
693 */
694#define CHKERR(error,fault) \
695 do { \
696 assert((error == 0) || (error == EFAULT)); \
697 if (error) \
698 return (VMEXIT_ABORT); \
699 else if (fault) \
700 return (VMEXIT_CONTINUE); \
701 } while (0)
702
703int
704vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
705{
706 struct seg_desc nt;
707 struct tss32 oldtss, newtss;
708 struct vm_task_switch *task_switch;
709 struct vm_guest_paging *paging, sup_paging;
710 struct user_segment_descriptor nt_desc, ot_desc;
711 struct iovec nt_iov[2], ot_iov[2];
712 uint64_t cr0, ot_base;
713 uint32_t eip, ot_lim, access;
714 int error, ext, fault, minlimit, nt_type, ot_type, vcpu;
715 enum task_switch_reason reason;
716 uint16_t nt_sel, ot_sel;
717
718 task_switch = &vmexit->u.task_switch;
719 nt_sel = task_switch->tsssel;
720 ext = vmexit->u.task_switch.ext;
721 reason = vmexit->u.task_switch.reason;
722 paging = &vmexit->u.task_switch.paging;

--- 11 unchanged lines hidden (view full) ---

734 * The following page table accesses are implicitly supervisor mode:
735 * - accesses to GDT or LDT to load segment descriptors
736 * - accesses to the task state segment during task switch
737 */
738 sup_paging = *paging;
739 sup_paging.cpl = 0; /* implicit supervisor mode */
740
741 /* Fetch the new TSS descriptor */
742 error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc,
743 &fault);
744 CHKERR(error, fault);
745
746 nt = usd_to_seg_desc(&nt_desc);
747
748 /* Verify the type of the new TSS */
749 nt_type = SEG_DESC_TYPE(nt.access);
750 if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
751 nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
752 sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);

--- 35 unchanged lines hidden (view full) ---

788 */
789 if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
790 sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext);
791 goto done;
792 }
793
794 /* Fetch the new TSS */
795 error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
796 PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
797 CHKERR(error, fault);
798 vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);
799
800 /* Get the old TSS selector from the guest's task register */
801 ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR);
802 if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
803 /*
804 * This might happen if a task switch was attempted without
805 * ever loading the task register with LTR. In this case the

--- 8 unchanged lines hidden (view full) ---

814 error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
815 &access);
816 assert(error == 0);
817 assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
818 ot_type = SEG_DESC_TYPE(access);
819 assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
820
821 /* Fetch the old TSS descriptor */
822 error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc,
823 &fault);
824 CHKERR(error, fault);
825
826 /* Get the old TSS */
827 error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
828 PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
829 CHKERR(error, fault);
830 vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);
831
832 /*
833 * Clear the busy bit in the old TSS descriptor if the task switch
834 * due to an IRET or JMP instruction.
835 */
836 if (reason == TSR_IRET || reason == TSR_JMP) {
837 ot_desc.sd_type &= ~0x2;
838 error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
839 &ot_desc, &fault);
840 CHKERR(error, fault);
841 }
842
843 if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
844 fprintf(stderr, "Task switch to 16-bit TSS not supported\n");
845 return (VMEXIT_ABORT);
846 }
847
848 /* Save processor state in old TSS */
849 tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov);
850
851 /*
852 * If the task switch was triggered for any reason other than IRET
853 * then set the busy bit in the new TSS descriptor.
854 */
855 if (reason != TSR_IRET) {
856 nt_desc.sd_type |= 0x2;
857 error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
858 &nt_desc, &fault);
859 CHKERR(error, fault);
860 }
861
862 /* Update task register to point at the new TSS */
863 SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel);
864
865 /* Update the hidden descriptor state of the task register */
866 nt = usd_to_seg_desc(&nt_desc);
867 update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt);

--- 6 unchanged lines hidden (view full) ---

874 * We are now committed to the task switch. Any exceptions encountered
875 * after this point will be handled in the context of the new task and
876 * the saved instruction pointer will belong to the new task.
877 */
878 error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
879 assert(error == 0);
880
881 /* Load processor state from new TSS */
882 error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
883 &fault);
884 CHKERR(error, fault);
885
886 /*
887 * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
888 * caused an error code to be generated, this error code is copied
889 * to the stack of the new task.
890 */
891 if (task_switch->errcode_valid) {
892 assert(task_switch->ext);
893 assert(task_switch->reason == TSR_IDT_GATE);
894 error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
895 task_switch->errcode, &fault);
896 CHKERR(error, fault);
897 }
898
899 /*
900 * Treatment of virtual-NMI blocking if NMI is delivered through
901 * a task gate.
902 *
903 * Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
904 * If the virtual NMIs VM-execution control is 1, VM entry injects

--- 35 unchanged lines hidden ---