/* * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) #include #endif #include #include #define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_) \ extern char assert_is_16byte_multiple_sizeof_ ## _type_ \ [(sizeof(_type_) % 16) == 0 ? 1 : -1] /* Compile-time checks for vital save area sizing: */ ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t); ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_sframe64_t); ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_compat32_t); ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t); #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT) extern zone_t iss_zone; /* zone for saved_state area */ extern zone_t ids_zone; /* zone for debug_state area */ extern void *get_bsduthreadarg(thread_t); void act_machine_switch_pcb(__unused thread_t old, thread_t new) { pcb_t pcb = THREAD_TO_PCB(new); cpu_data_t *cdp = current_cpu_datap(); struct real_descriptor *ldtp; mach_vm_offset_t pcb_stack_top; assert(new->kernel_stack != 0); assert(ml_get_interrupts_enabled() == FALSE); #ifdef DIRECTION_FLAG_DEBUG if (x86_get_flags() & EFL_DF) { panic("Direction flag detected: 0x%lx", x86_get_flags()); } #endif #if defined(__x86_64__) /* * Clear segment state * unconditionally for DS/ES/FS but more carefully for GS whose * cached state we track. */ set_ds(NULL_SEG); set_es(NULL_SEG); set_fs(NULL_SEG); if (get_gs() != NULL_SEG) { swapgs(); /* switch to user's GS context */ set_gs(NULL_SEG); swapgs(); /* and back to kernel */ /* record the active machine state lost */ cdp->cpu_uber.cu_user_gs_base = 0; } if (is_saved_state64(pcb->iss)) { /* * The test above is performed against the thread save state * flavor and not task's 64-bit feature flag because of the * thread/task 64-bit state divergence that can arise in * task_set_64bit() x86: the task state is changed before * the individual thread(s). */ x86_saved_state64_tagged_t *iss64; vm_offset_t isf; assert(is_saved_state64(pcb->iss)); iss64 = (x86_saved_state64_tagged_t *) pcb->iss; /* * Set pointer to PCB's interrupt stack frame in cpu data. * Used by syscall and double-fault trap handlers. */ isf = (vm_offset_t) &iss64->state.isf; cdp->cpu_uber.cu_isf = isf; pcb_stack_top = (vm_offset_t) (iss64 + 1); /* require 16-byte alignment */ assert((pcb_stack_top & 0xF) == 0); /* Interrupt stack is pcb */ current_ktss64()->rsp0 = pcb_stack_top; /* * Top of temporary sysenter stack points to pcb stack. * Although this is not normally used by 64-bit users, * it needs to be set in case a sysenter is attempted. */ *current_sstk64() = pcb_stack_top; cdp->cpu_task_map = new->map->pmap->pm_task_map; /* * Enable the 64-bit user code segment, USER64_CS. * Disable the 32-bit user code segment, USER_CS. */ ldt_desc_p(USER64_CS)->access |= ACC_PL_U; ldt_desc_p(USER_CS)->access &= ~ACC_PL_U; /* * Switch user's GS base if necessary * by setting the Kernel's GS base MSR * - this will become the user's on the swapgs when * returning to user-space. Avoid this for * kernel threads (no user TLS support required) * and verify the memory shadow of the segment base * in the event it was altered in user space. */ if ((pcb->cthread_self != 0) || (new->task != kernel_task)) { if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) { cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); } } } else { x86_saved_state_compat32_t *iss32compat; vm_offset_t isf; assert(is_saved_state32(pcb->iss)); iss32compat = (x86_saved_state_compat32_t *) pcb->iss; pcb_stack_top = (uintptr_t) (iss32compat + 1); /* require 16-byte alignment */ assert((pcb_stack_top & 0xF) == 0); /* * Set pointer to PCB's interrupt stack frame in cpu data. * Used by debug trap handler. */ isf = (vm_offset_t) &iss32compat->isf64; cdp->cpu_uber.cu_isf = isf; /* Top of temporary sysenter stack points to pcb stack */ *current_sstk64() = pcb_stack_top; /* Interrupt stack is pcb */ current_ktss64()->rsp0 = pcb_stack_top; cdp->cpu_task_map = TASK_MAP_32BIT; /* Precalculate pointers to syscall argument store, for use * in the trampolines. */ cdp->cpu_uber_arg_store = (vm_offset_t)get_bsduthreadarg(new); cdp->cpu_uber_arg_store_valid = (vm_offset_t)&pcb->arg_store_valid; pcb->arg_store_valid = 0; /* * Disable USER64_CS * Enable USER_CS */ ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U; ldt_desc_p(USER_CS)->access |= ACC_PL_U; /* * Set the thread`s cthread (a.k.a pthread) * For 32-bit user this involves setting the USER_CTHREAD * descriptor in the LDT to point to the cthread data. * The involves copying in the pre-initialized descriptor. */ ldtp = (struct real_descriptor *)current_ldt(); ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; if (pcb->uldt_selector != 0) ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; /* * Set the thread`s LDT or LDT entry. */ if (new->task == TASK_NULL || new->task->i386_ldt == 0) { /* * Use system LDT. */ ml_cpu_set_ldt(KERNEL_LDT); } else { /* * Task has its own LDT. */ user_ldt_set(new); } } #else /* !__x86_64__ */ vm_offset_t hi_pcb_stack_top; vm_offset_t hi_iss; if (!cpu_mode_is64bit()) { x86_saved_state32_tagged_t *hi_iss32; /* * Save a pointer to the top of the "kernel" stack - * actually the place in the PCB where a trap into * kernel mode will push the registers. */ hi_iss = (vm_offset_t)((unsigned long) pmap_cpu_high_map_vaddr(cpu_number(), HIGH_CPU_ISS0) | ((unsigned long)pcb->iss & PAGE_MASK)); cdp->cpu_hi_iss = (void *)hi_iss; pmap_high_map(pcb->iss_pte0, HIGH_CPU_ISS0); pmap_high_map(pcb->iss_pte1, HIGH_CPU_ISS1); hi_iss32 = (x86_saved_state32_tagged_t *) hi_iss; assert(hi_iss32->tag == x86_SAVED_STATE32); hi_pcb_stack_top = (int) (hi_iss32 + 1); /* * For fast syscall, top of interrupt stack points to pcb stack */ *(vm_offset_t *) current_sstk() = hi_pcb_stack_top; current_ktss()->esp0 = hi_pcb_stack_top; } else if (is_saved_state64(pcb->iss)) { /* * The test above is performed against the thread save state * flavor and not task's 64-bit feature flag because of the * thread/task 64-bit state divergence that can arise in * task_set_64bit() x86: the task state is changed before * the individual thread(s). */ x86_saved_state64_tagged_t *iss64; vm_offset_t isf; assert(is_saved_state64(pcb->iss)); iss64 = (x86_saved_state64_tagged_t *) pcb->iss; /* * Set pointer to PCB's interrupt stack frame in cpu data. * Used by syscall and double-fault trap handlers. */ isf = (vm_offset_t) &iss64->state.isf; cdp->cpu_uber.cu_isf = UBER64(isf); pcb_stack_top = (vm_offset_t) (iss64 + 1); /* require 16-byte alignment */ assert((pcb_stack_top & 0xF) == 0); /* Interrupt stack is pcb */ current_ktss64()->rsp0 = UBER64(pcb_stack_top); /* * Top of temporary sysenter stack points to pcb stack. * Although this is not normally used by 64-bit users, * it needs to be set in case a sysenter is attempted. */ *current_sstk64() = UBER64(pcb_stack_top); cdp->cpu_task_map = new->map->pmap->pm_task_map; /* * Enable the 64-bit user code segment, USER64_CS. * Disable the 32-bit user code segment, USER_CS. */ ldt_desc_p(USER64_CS)->access |= ACC_PL_U; ldt_desc_p(USER_CS)->access &= ~ACC_PL_U; } else { x86_saved_state_compat32_t *iss32compat; vm_offset_t isf; assert(is_saved_state32(pcb->iss)); iss32compat = (x86_saved_state_compat32_t *) pcb->iss; pcb_stack_top = (int) (iss32compat + 1); /* require 16-byte alignment */ assert((pcb_stack_top & 0xF) == 0); /* * Set pointer to PCB's interrupt stack frame in cpu data. * Used by debug trap handler. */ isf = (vm_offset_t) &iss32compat->isf64; cdp->cpu_uber.cu_isf = UBER64(isf); /* Top of temporary sysenter stack points to pcb stack */ *current_sstk64() = UBER64(pcb_stack_top); /* Interrupt stack is pcb */ current_ktss64()->rsp0 = UBER64(pcb_stack_top); cdp->cpu_task_map = TASK_MAP_32BIT; /* Precalculate pointers to syscall argument store, for use * in the trampolines. */ cdp->cpu_uber_arg_store = UBER64((vm_offset_t)get_bsduthreadarg(new)); cdp->cpu_uber_arg_store_valid = UBER64((vm_offset_t)&pcb->arg_store_valid); pcb->arg_store_valid = 0; /* * Disable USER64_CS * Enable USER_CS */ ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U; ldt_desc_p(USER_CS)->access |= ACC_PL_U; } /* * Set the thread`s cthread (a.k.a pthread) * For 32-bit user this involves setting the USER_CTHREAD * descriptor in the LDT to point to the cthread data. * The involves copying in the pre-initialized descriptor. */ ldtp = (struct real_descriptor *)current_ldt(); ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; if (pcb->uldt_selector != 0) ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; /* * For 64-bit, we additionally set the 64-bit User GS base * address. On return to 64-bit user, the GS.Base MSR will be written. */ cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; /* * Set the thread`s LDT or LDT entry. */ if (new->task == TASK_NULL || new->task->i386_ldt == 0) { /* * Use system LDT. */ ml_cpu_set_ldt(KERNEL_LDT); } else { /* * Task has its own LDT. */ user_ldt_set(new); } #endif /* * Bump the scheduler generation count in the commpage. * This can be read by user code to detect its preemption. */ commpage_sched_gen_inc(); } void thread_set_wq_state32(thread_t thread, thread_state_t tstate) { x86_thread_state32_t *state; x86_saved_state32_t *saved_state; thread_t curth = current_thread(); spl_t s=0; pal_register_cache_state(thread, DIRTY); saved_state = USER_REGS32(thread); state = (x86_thread_state32_t *)tstate; if (curth != thread) { s = splsched(); thread_lock(thread); } saved_state->ebp = 0; saved_state->eip = state->eip; saved_state->eax = state->eax; saved_state->ebx = state->ebx; saved_state->ecx = state->ecx; saved_state->edx = state->edx; saved_state->edi = state->edi; saved_state->esi = state->esi; saved_state->uesp = state->esp; saved_state->efl = EFL_USER_SET; saved_state->cs = USER_CS; saved_state->ss = USER_DS; saved_state->ds = USER_DS; saved_state->es = USER_DS; if (curth != thread) { thread_unlock(thread); splx(s); } } void thread_set_wq_state64(thread_t thread, thread_state_t tstate) { x86_thread_state64_t *state; x86_saved_state64_t *saved_state; thread_t curth = current_thread(); spl_t s=0; pal_register_cache_state(thread, DIRTY); saved_state = USER_REGS64(thread); state = (x86_thread_state64_t *)tstate; if (curth != thread) { s = splsched(); thread_lock(thread); } saved_state->rbp = 0; saved_state->rdi = state->rdi; saved_state->rsi = state->rsi; saved_state->rdx = state->rdx; saved_state->rcx = state->rcx; saved_state->r8 = state->r8; saved_state->r9 = state->r9; saved_state->isf.rip = state->rip; saved_state->isf.rsp = state->rsp; saved_state->isf.cs = USER64_CS; saved_state->isf.rflags = EFL_USER_SET; if (curth != thread) { thread_unlock(thread); splx(s); } } /* * Initialize the machine-dependent state for a new thread. */ kern_return_t machine_thread_create( thread_t thread, task_t task) { pcb_t pcb = THREAD_TO_PCB(thread); x86_saved_state_t *iss; #if NCOPY_WINDOWS > 0 inval_copy_windows(thread); thread->machine.physwindow_pte = 0; thread->machine.physwindow_busy = 0; #endif /* * Allocate save frame only if required. */ if (pcb->sf == NULL) { assert((get_preemption_level() == 0)); pcb->sf = zalloc(iss_zone); if (pcb->sf == NULL) panic("iss_zone"); } if (task_has_64BitAddr(task)) { x86_sframe64_t *sf64; sf64 = (x86_sframe64_t *) pcb->sf; bzero((char *)sf64, sizeof(x86_sframe64_t)); iss = (x86_saved_state_t *) &sf64->ssf; iss->flavor = x86_SAVED_STATE64; /* * Guarantee that the bootstrapped thread will be in user * mode. */ iss->ss_64.isf.rflags = EFL_USER_SET; iss->ss_64.isf.cs = USER64_CS; iss->ss_64.isf.ss = USER_DS; iss->ss_64.fs = USER_DS; iss->ss_64.gs = USER_DS; } else { if (cpu_mode_is64bit()) { x86_sframe_compat32_t *sfc32; sfc32 = (x86_sframe_compat32_t *)pcb->sf; bzero((char *)sfc32, sizeof(x86_sframe_compat32_t)); iss = (x86_saved_state_t *) &sfc32->ssf.iss32; iss->flavor = x86_SAVED_STATE32; #if defined(__i386__) #if DEBUG { sfc32->pad_for_16byte_alignment[0] = 0x64326432; sfc32->pad_for_16byte_alignment[1] = 0x64326432; } #endif /* DEBUG */ } else { x86_sframe32_t *sf32; struct real_descriptor *ldtp; pmap_paddr_t paddr; sf32 = (x86_sframe32_t *) pcb->sf; bzero((char *)sf32, sizeof(x86_sframe32_t)); iss = (x86_saved_state_t *) &sf32->ssf; iss->flavor = x86_SAVED_STATE32; pcb->iss_pte0 = pte_kernel_rw(kvtophys((vm_offset_t)iss)); if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)iss + PAGE_SIZE)))) pcb->iss_pte1 = INTEL_PTE_INVALID; else pcb->iss_pte1 = pte_kernel_rw(paddr); ldtp = (struct real_descriptor *) pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN); pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; pcb->uldt_desc = ldtp[sel_idx(USER_DS)]; #endif /* __i386__ */ } /* * Guarantee that the bootstrapped thread will be in user * mode. */ iss->ss_32.cs = USER_CS; iss->ss_32.ss = USER_DS; iss->ss_32.ds = USER_DS; iss->ss_32.es = USER_DS; iss->ss_32.fs = USER_DS; iss->ss_32.gs = USER_DS; iss->ss_32.efl = EFL_USER_SET; } pcb->iss = iss; simple_lock_init(&pcb->lock, 0); pcb->arg_store_valid = 0; pcb->cthread_self = 0; pcb->uldt_selector = 0; /* Ensure that the "cthread" descriptor describes a valid * segment. */ if ((pcb->cthread_desc.access & ACC_P) == 0) { struct real_descriptor *ldtp; ldtp = (struct real_descriptor *)current_ldt(); pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; } return(KERN_SUCCESS); } /* * Machine-dependent cleanup prior to destroying a thread */ void machine_thread_destroy( thread_t thread) { register pcb_t pcb = THREAD_TO_PCB(thread); if (pcb->ifps != 0) fpu_free(pcb->ifps); if (pcb->sf != 0) { zfree(iss_zone, pcb->sf); pcb->sf = 0; } if (pcb->ids) { zfree(ids_zone, pcb->ids); pcb->ids = NULL; } }