1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57#include <mach_rt.h>
58#include <mach_debug.h>
59#include <mach_ldebug.h>
60
61#include <sys/kdebug.h>
62
63#include <mach/kern_return.h>
64#include <mach/thread_status.h>
65#include <mach/vm_param.h>
66
67#include <kern/counters.h>
68#include <kern/kalloc.h>
69#include <kern/mach_param.h>
70#include <kern/processor.h>
71#include <kern/cpu_data.h>
72#include <kern/cpu_number.h>
73#include <kern/task.h>
74#include <kern/thread.h>
75#include <kern/sched_prim.h>
76#include <kern/misc_protos.h>
77#include <kern/assert.h>
78#include <kern/spl.h>
79#include <kern/machine.h>
80#include <ipc/ipc_port.h>
81#include <vm/vm_kern.h>
82#include <vm/vm_map.h>
83#include <vm/pmap.h>
84#include <vm/vm_protos.h>
85
86#include <i386/commpage/commpage.h>
87#include <i386/cpu_data.h>
88#include <i386/cpu_number.h>
89#include <i386/eflags.h>
90#include <i386/proc_reg.h>
91#include <i386/tss.h>
92#include <i386/user_ldt.h>
93#include <i386/fpu.h>
94#include <i386/mp_desc.h>
95#include <i386/misc_protos.h>
96#include <i386/thread.h>
97#include <i386/seg.h>
98#include <i386/machine_routines.h>
99
100#if HYPERVISOR
101#include <kern/hv_support.h>
102#endif
103
104#define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_)	\
105extern char assert_is_16byte_multiple_sizeof_ ## _type_	\
106		[(sizeof(_type_) % 16) == 0 ? 1 : -1]
107
108/* Compile-time checks for vital save area sizing: */
109ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t);
110ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t);
111
112#define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT)
113
114extern zone_t		iss_zone;		/* zone for saved_state area */
115extern zone_t		ids_zone;		/* zone for debug_state area */
116
117void
118act_machine_switch_pcb(__unused thread_t old, thread_t new)
119{
120        pcb_t			pcb = THREAD_TO_PCB(new);
121	cpu_data_t      	*cdp = current_cpu_datap();
122	struct real_descriptor	*ldtp;
123	mach_vm_offset_t	pcb_stack_top;
124
125	assert(new->kernel_stack != 0);
126	assert(ml_get_interrupts_enabled() == FALSE);
127#ifdef	DIRECTION_FLAG_DEBUG
128	if (x86_get_flags() & EFL_DF) {
129		panic("Direction flag detected: 0x%lx", x86_get_flags());
130	}
131#endif
132
133	/*
134	 * Clear segment state
135	 * unconditionally for DS/ES/FS but more carefully for GS whose
136	 * cached state we track.
137	 */
138	set_ds(NULL_SEG);
139	set_es(NULL_SEG);
140	set_fs(NULL_SEG);
141	if (get_gs() != NULL_SEG) {
142		swapgs();		/* switch to user's GS context */
143		set_gs(NULL_SEG);
144		swapgs();		/* and back to kernel */
145
146		/* record the active machine state lost */
147		cdp->cpu_uber.cu_user_gs_base = 0;
148	}
149
150	vm_offset_t			isf;
151
152	/*
153	 * Set pointer to PCB's interrupt stack frame in cpu data.
154	 * Used by syscall and double-fault trap handlers.
155	 */
156	isf = (vm_offset_t) &pcb->iss->ss_64.isf;
157	cdp->cpu_uber.cu_isf = isf;
158	pcb_stack_top = (vm_offset_t) (pcb->iss + 1);
159	/* require 16-byte alignment */
160	assert((pcb_stack_top & 0xF) == 0);
161
162	/* Interrupt stack is pcb */
163	current_ktss64()->rsp0 = pcb_stack_top;
164
165	/*
166	 * Top of temporary sysenter stack points to pcb stack.
167	 * Although this is not normally used by 64-bit users,
168	 * it needs to be set in case a sysenter is attempted.
169	 */
170	*current_sstk64() = pcb_stack_top;
171
172	if (is_saved_state64(pcb->iss)) {
173
174		cdp->cpu_task_map = new->map->pmap->pm_task_map;
175
176		/*
177		 * Enable the 64-bit user code segment, USER64_CS.
178		 * Disable the 32-bit user code segment, USER_CS.
179		 */
180		ldt_desc_p(USER64_CS)->access |= ACC_PL_U;
181		ldt_desc_p(USER_CS)->access &= ~ACC_PL_U;
182
183		/*
184		 * Switch user's GS base if necessary
185		 * by setting the Kernel's GS base MSR
186		 * - this will become the user's on the swapgs when
187		 * returning to user-space.  Avoid this for
188		 * kernel threads (no user TLS support required)
189		 * and verify the memory shadow of the segment base
190		 * in the event it was altered in user space.
191		 */
192		if ((pcb->cthread_self != 0) || (new->task != kernel_task)) {
193			if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
194				cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
195				wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self);
196			}
197		}
198
199	} else {
200
201		cdp->cpu_task_map = TASK_MAP_32BIT;
202
203		/*
204		 * Disable USER64_CS
205		 * Enable USER_CS
206		 */
207		ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
208		ldt_desc_p(USER_CS)->access |= ACC_PL_U;
209
210		/*
211		 * Set the thread`s cthread (a.k.a pthread)
212		 * For 32-bit user this involves setting the USER_CTHREAD
213		 * descriptor in the LDT to point to the cthread data.
214		 * The involves copying in the pre-initialized descriptor.
215		 */
216		ldtp = (struct real_descriptor *)current_ldt();
217		ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
218		if (pcb->uldt_selector != 0)
219			ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
220		cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
221
222		/*
223		 * Set the thread`s LDT or LDT entry.
224		 */
225		if (new->task == TASK_NULL || new->task->i386_ldt == 0) {
226			/*
227			 * Use system LDT.
228			 */
229		       	ml_cpu_set_ldt(KERNEL_LDT);
230		} else {
231			/*
232			 * Task has its own LDT.
233			 */
234			user_ldt_set(new);
235		}
236	}
237
238	/*
239	 * Bump the scheduler generation count in the commpage.
240	 * This can be read by user code to detect its preemption.
241	 */
242	commpage_sched_gen_inc();
243}
244
245kern_return_t
246thread_set_wq_state32(thread_t thread, thread_state_t tstate)
247{
248        x86_thread_state32_t	*state;
249        x86_saved_state32_t	*saved_state;
250	thread_t curth = current_thread();
251	spl_t			s=0;
252
253	pal_register_cache_state(thread, DIRTY);
254
255	saved_state = USER_REGS32(thread);
256
257	state = (x86_thread_state32_t *)tstate;
258
259	if (curth != thread) {
260		s = splsched();
261	        thread_lock(thread);
262	}
263
264	saved_state->ebp = 0;
265	saved_state->eip = state->eip;
266	saved_state->eax = state->eax;
267	saved_state->ebx = state->ebx;
268	saved_state->ecx = state->ecx;
269	saved_state->edx = state->edx;
270	saved_state->edi = state->edi;
271	saved_state->esi = state->esi;
272	saved_state->uesp = state->esp;
273	saved_state->efl = EFL_USER_SET;
274
275	saved_state->cs = USER_CS;
276	saved_state->ss = USER_DS;
277	saved_state->ds = USER_DS;
278	saved_state->es = USER_DS;
279
280	if (curth != thread) {
281	        thread_unlock(thread);
282		splx(s);
283	}
284
285	return KERN_SUCCESS;
286}
287
288
289kern_return_t
290thread_set_wq_state64(thread_t thread, thread_state_t tstate)
291{
292        x86_thread_state64_t	*state;
293        x86_saved_state64_t	*saved_state;
294	thread_t curth = current_thread();
295	spl_t			s=0;
296
297	saved_state = USER_REGS64(thread);
298	state = (x86_thread_state64_t *)tstate;
299
300	/* Disallow setting non-canonical PC or stack */
301	if (!IS_USERADDR64_CANONICAL(state->rsp) ||
302	    !IS_USERADDR64_CANONICAL(state->rip)) {
303		return KERN_FAILURE;
304	}
305
306	pal_register_cache_state(thread, DIRTY);
307
308	if (curth != thread) {
309		s = splsched();
310	        thread_lock(thread);
311	}
312
313	saved_state->rbp = 0;
314	saved_state->rdi = state->rdi;
315	saved_state->rsi = state->rsi;
316	saved_state->rdx = state->rdx;
317	saved_state->rcx = state->rcx;
318	saved_state->r8  = state->r8;
319	saved_state->r9  = state->r9;
320
321	saved_state->isf.rip = state->rip;
322	saved_state->isf.rsp = state->rsp;
323	saved_state->isf.cs = USER64_CS;
324	saved_state->isf.rflags = EFL_USER_SET;
325
326	if (curth != thread) {
327	        thread_unlock(thread);
328		splx(s);
329	}
330
331	return KERN_SUCCESS;
332}
333
334/*
335 * Initialize the machine-dependent state for a new thread.
336 */
337kern_return_t
338machine_thread_create(
339	thread_t		thread,
340	task_t			task)
341{
342        pcb_t			pcb = THREAD_TO_PCB(thread);
343
344#if NCOPY_WINDOWS > 0
345	inval_copy_windows(thread);
346
347	thread->machine.physwindow_pte = 0;
348	thread->machine.physwindow_busy = 0;
349#endif
350
351	/*
352	 * Allocate save frame only if required.
353	 */
354	if (pcb->iss == NULL) {
355		assert((get_preemption_level() == 0));
356		pcb->iss = (x86_saved_state_t *) zalloc(iss_zone);
357		if (pcb->iss == NULL)
358			panic("iss_zone");
359	}
360
361	/*
362	 * Assure that the synthesized 32-bit state including
363	 * the 64-bit interrupt state can be acommodated in the
364	 * 64-bit state we allocate for both 32-bit and 64-bit threads.
365	 */
366	assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <=
367	       sizeof(pcb->iss->ss_64));
368
369	bzero((char *)pcb->iss, sizeof(x86_saved_state_t));
370
371        if (task_has_64BitAddr(task)) {
372		pcb->iss->flavor = x86_SAVED_STATE64;
373
374		pcb->iss->ss_64.isf.cs = USER64_CS;
375		pcb->iss->ss_64.isf.ss = USER_DS;
376		pcb->iss->ss_64.fs = USER_DS;
377		pcb->iss->ss_64.gs = USER_DS;
378		pcb->iss->ss_64.isf.rflags = EFL_USER_SET;
379	} else {
380		pcb->iss->flavor = x86_SAVED_STATE32;
381
382		pcb->iss->ss_32.cs = USER_CS;
383		pcb->iss->ss_32.ss = USER_DS;
384		pcb->iss->ss_32.ds = USER_DS;
385		pcb->iss->ss_32.es = USER_DS;
386		pcb->iss->ss_32.fs = USER_DS;
387		pcb->iss->ss_32.gs = USER_DS;
388		pcb->iss->ss_32.efl = EFL_USER_SET;
389	}
390
391	simple_lock_init(&pcb->lock, 0);
392
393	pcb->cthread_self = 0;
394	pcb->uldt_selector = 0;
395	pcb->thread_gpu_ns = 0;
396	/* Ensure that the "cthread" descriptor describes a valid
397	 * segment.
398	 */
399	if ((pcb->cthread_desc.access & ACC_P) == 0) {
400		struct real_descriptor  *ldtp;
401		ldtp = (struct real_descriptor *)current_ldt();
402		pcb->cthread_desc = ldtp[sel_idx(USER_DS)];
403	}
404
405	return(KERN_SUCCESS);
406}
407
408/*
409 * Machine-dependent cleanup prior to destroying a thread
410 */
411void
412machine_thread_destroy(
413	thread_t		thread)
414{
415	register pcb_t	pcb = THREAD_TO_PCB(thread);
416
417#if HYPERVISOR
418	if (thread->hv_thread_target) {
419		hv_callbacks.thread_destroy(thread->hv_thread_target);
420		thread->hv_thread_target = NULL;
421	}
422#endif
423
424	if (pcb->ifps != 0)
425		fpu_free(pcb->ifps);
426	if (pcb->iss != 0) {
427		zfree(iss_zone, pcb->iss);
428		pcb->iss = 0;
429	}
430	if (pcb->ids) {
431		zfree(ids_zone, pcb->ids);
432		pcb->ids = NULL;
433	}
434}
435
436kern_return_t
437machine_thread_set_tsd_base(
438	thread_t			thread,
439	mach_vm_offset_t	tsd_base)
440{
441
442	if (thread->task == kernel_task) {
443		return KERN_INVALID_ARGUMENT;
444	}
445
446	if (thread_is_64bit(thread)) {
447		/* check for canonical address, set 0 otherwise  */
448		if (!IS_USERADDR64_CANONICAL(tsd_base))
449			tsd_base = 0ULL;
450	} else {
451		if (tsd_base > UINT32_MAX)
452			tsd_base = 0ULL;
453	}
454
455	pcb_t pcb = THREAD_TO_PCB(thread);
456	pcb->cthread_self = tsd_base;
457
458	if (!thread_is_64bit(thread)) {
459		/* Set up descriptor for later use */
460		struct real_descriptor desc = {
461			.limit_low = 1,
462			.limit_high = 0,
463			.base_low = tsd_base & 0xffff,
464			.base_med = (tsd_base >> 16) & 0xff,
465			.base_high = (tsd_base >> 24) & 0xff,
466			.access = ACC_P|ACC_PL_U|ACC_DATA_W,
467			.granularity = SZ_32|SZ_G,
468		};
469
470		pcb->cthread_desc = desc;
471		saved_state32(pcb->iss)->gs = USER_CTHREAD;
472	}
473
474	/* For current thread, make the TSD base active immediately */
475	if (thread == current_thread()) {
476
477		if (thread_is_64bit(thread)) {
478			cpu_data_t              *cdp;
479
480			mp_disable_preemption();
481			cdp = current_cpu_datap();
482			if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
483				(pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE)))
484				wrmsr64(MSR_IA32_KERNEL_GS_BASE, tsd_base);
485			cdp->cpu_uber.cu_user_gs_base = tsd_base;
486			mp_enable_preemption();
487		} else {
488
489			/* assign descriptor */
490			mp_disable_preemption();
491			*ldt_desc_p(USER_CTHREAD) = pcb->cthread_desc;
492			mp_enable_preemption();
493		}
494	}
495
496	return KERN_SUCCESS;
497}
498