1/*
2 * Copyright 2018, J��r��me Duval, jerome.duval@gmail.com.
3 * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
4 * Copyright 2002-2008, Axel D��rfler, axeld@pinc-software.de.
5 * Distributed under the terms of the MIT License.
6 *
7 * Copyright 2001, Travis Geiselbrecht. All rights reserved.
8 * Distributed under the terms of the NewOS License.
9 */
10
11
12#include <arch/thread.h>
13
14#include <string.h>
15
16#include <arch_thread_defs.h>
17#include <commpage.h>
18#include <cpu.h>
19#include <debug.h>
20#include <generic_syscall.h>
21#include <kernel.h>
22#include <ksignal.h>
23#include <int.h>
24#include <team.h>
25#include <thread.h>
26#include <tls.h>
27#include <tracing.h>
28#include <util/Random.h>
29#include <vm/vm_types.h>
30#include <vm/VMAddressSpace.h>
31
32#include "paging/X86PagingStructures.h"
33#include "paging/X86VMTranslationMap.h"
34
35
36//#define TRACE_ARCH_THREAD
37#ifdef TRACE_ARCH_THREAD
38#	define TRACE(x...) dprintf(x)
39#else
40#	define TRACE(x...) ;
41#endif
42
43
44#ifdef SYSCALL_TRACING
45
46namespace SyscallTracing {
47
48class RestartSyscall : public AbstractTraceEntry {
49	public:
50		RestartSyscall()
51		{
52			Initialized();
53		}
54
55		virtual void AddDump(TraceOutput& out)
56		{
57			out.Print("syscall restart");
58		}
59};
60
61}
62
63#	define TSYSCALL(x)	new(std::nothrow) SyscallTracing::x
64
65#else
66#	define TSYSCALL(x)
67#endif	// SYSCALL_TRACING
68
69
70extern "C" void x86_64_thread_entry();
71
72// Initial thread saved state.
73static arch_thread sInitialState _ALIGNED(64);
74uint16 gFPUControlDefault;
75uint32 gFPUMXCSRDefault;
76extern uint64 gFPUSaveLength;
77extern bool gHasXsave;
78extern bool gHasXsavec;
79
80
81void
82x86_restart_syscall(iframe* frame)
83{
84	Thread* thread = thread_get_current_thread();
85
86	atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL);
87	atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED);
88
89	// Get back the original system call number and modify the frame to
90	// re-execute the syscall instruction.
91	frame->ax = frame->orig_rax;
92	frame->ip -= 2;
93
94	TSYSCALL(RestartSyscall());
95}
96
97
98void
99x86_set_tls_context(Thread* thread)
100{
101	// Set FS segment base address to the TLS segment.
102	x86_write_msr(IA32_MSR_FS_BASE, thread->user_local_storage);
103	x86_write_msr(IA32_MSR_KERNEL_GS_BASE, thread->arch_info.user_gs_base);
104}
105
106
107static addr_t
108arch_randomize_stack_pointer(addr_t value)
109{
110	static_assert(MAX_RANDOM_VALUE >= B_PAGE_SIZE - 1,
111		"randomization range is too big");
112	value -= random_value() & (B_PAGE_SIZE - 1);
113	return (value & ~addr_t(0xf)) - 8;
114		// This means, result % 16 == 8, which is what rsp should adhere to
115		// when a function is entered for the stack to be considered aligned to
116		// 16 byte.
117}
118
119
120static uint8*
121get_signal_stack(Thread* thread, iframe* frame, struct sigaction* action,
122	size_t spaceNeeded)
123{
124	// Use the alternate signal stack if we should and can.
125	if (thread->signal_stack_enabled
126			&& (action->sa_flags & SA_ONSTACK) != 0
127			&& (frame->user_sp < thread->signal_stack_base
128				|| frame->user_sp >= thread->signal_stack_base
129					+ thread->signal_stack_size)) {
130		addr_t stackTop = thread->signal_stack_base + thread->signal_stack_size;
131		return (uint8*)arch_randomize_stack_pointer(stackTop - spaceNeeded);
132	}
133
134	// We are going to use the stack that we are already on. We must not touch
135	// the red zone (128 byte area below the stack pointer, reserved for use
136	// by functions to store temporary data and guaranteed not to be modified
137	// by signal handlers).
138	return (uint8*)((frame->user_sp - 128 - spaceNeeded) & ~addr_t(0xf)) - 8;
139		// align stack pointer (cf. arch_randomize_stack_pointer())
140}
141
142
143static status_t
144arch_thread_control(const char* subsystem, uint32 function, void* buffer,
145	size_t bufferSize)
146{
147	switch (function) {
148		case THREAD_SET_GS_BASE:
149		{
150			uint64 base;
151			if (bufferSize != sizeof(base))
152				return B_BAD_VALUE;
153
154			if (!IS_USER_ADDRESS(buffer)
155				|| user_memcpy(&base, buffer, sizeof(base)) < B_OK) {
156				return B_BAD_ADDRESS;
157			}
158
159			Thread* thread = thread_get_current_thread();
160			thread->arch_info.user_gs_base = base;
161			x86_write_msr(IA32_MSR_KERNEL_GS_BASE, base);
162			return B_OK;
163		}
164	}
165	return B_BAD_HANDLER;
166}
167
168
169//	#pragma mark -
170
171
172status_t
173arch_thread_init(kernel_args* args)
174{
175	// Save one global valid FPU state; it will be copied in the arch dependent
176	// part of each new thread.
177	if (gHasXsave || gHasXsavec) {
178		memset(sInitialState.fpu_state, 0, gFPUSaveLength);
179		if (gHasXsavec) {
180			asm volatile (
181				"clts;"		\
182				"fninit;"	\
183				"fnclex;"	\
184				"movl $0x7,%%eax;"	\
185				"movl $0x0,%%edx;"	\
186				"xsavec64 %0"
187				:: "m" (sInitialState.fpu_state));
188		} else {
189			asm volatile (
190				"clts;"		\
191				"fninit;"	\
192				"fnclex;"	\
193				"movl $0x7,%%eax;"	\
194				"movl $0x0,%%edx;"	\
195				"xsave64 %0"
196				:: "m" (sInitialState.fpu_state));
197		}
198	} else {
199		asm volatile (
200			"clts;"		\
201			"fninit;"	\
202			"fnclex;"	\
203			"fxsaveq %0"
204			:: "m" (sInitialState.fpu_state));
205	}
206	gFPUControlDefault = ((savefpu*)&sInitialState.fpu_state)->fp_fxsave.control;
207	gFPUMXCSRDefault = ((savefpu*)&sInitialState.fpu_state)->fp_fxsave.mxcsr;
208
209	register_generic_syscall(THREAD_SYSCALLS, arch_thread_control, 1, 0);
210
211	return B_OK;
212}
213
214
215status_t
216arch_thread_init_thread_struct(Thread* thread)
217{
218	// Copy the initial saved FPU state to the new thread.
219	memcpy(&thread->arch_info, &sInitialState, sizeof(arch_thread));
220
221	// Initialise the current thread pointer.
222	thread->arch_info.thread = thread;
223
224	return B_OK;
225}
226
227
228/*!	Prepares the given thread's kernel stack for executing its entry function.
229
230	\param thread The thread.
231	\param stack The usable bottom of the thread's kernel stack.
232	\param stackTop The usable top of the thread's kernel stack.
233	\param function The entry function the thread shall execute.
234	\param data Pointer to be passed to the entry function.
235*/
236void
237arch_thread_init_kthread_stack(Thread* thread, void* _stack, void* _stackTop,
238	void (*function)(void*), const void* data)
239{
240	uintptr_t* stackTop = static_cast<uintptr_t*>(_stackTop);
241
242	TRACE("arch_thread_init_kthread_stack: stack top %p, function %p, data: "
243		"%p\n", _stackTop, function, data);
244
245	// Save the stack top for system call entry.
246	thread->arch_info.syscall_rsp = (uint64*)thread->kernel_stack_top;
247
248	thread->arch_info.instruction_pointer
249		= reinterpret_cast<uintptr_t>(x86_64_thread_entry);
250
251	*--stackTop = uintptr_t(data);
252	*--stackTop = uintptr_t(function);
253
254	// Save the stack position.
255	thread->arch_info.current_stack = stackTop;
256}
257
258
259void
260arch_thread_dump_info(void* info)
261{
262	arch_thread* thread = (arch_thread*)info;
263
264	kprintf("\trsp: %p\n", thread->current_stack);
265	kprintf("\tsyscall_rsp: %p\n", thread->syscall_rsp);
266	kprintf("\tuser_rsp: %p\n", thread->user_rsp);
267	kprintf("\tfpu_state at %p\n", thread->fpu_state);
268}
269
270
271/*!	Sets up initial thread context and enters user space
272*/
273status_t
274arch_thread_enter_userspace(Thread* thread, addr_t entry, void* args1,
275	void* args2)
276{
277	addr_t stackTop = thread->user_stack_base + thread->user_stack_size;
278	addr_t codeAddr;
279
280	TRACE("arch_thread_enter_userspace: entry %#lx, args %p %p, "
281		"stackTop %#lx\n", entry, args1, args2, stackTop);
282
283	stackTop = arch_randomize_stack_pointer(stackTop - sizeof(codeAddr));
284
285	// Copy the address of the stub that calls exit_thread() when the thread
286	// entry function returns to the top of the stack to act as the return
287	// address. The stub is inside commpage.
288	addr_t commPageAddress = (addr_t)thread->team->commpage_address;
289	set_ac();
290	codeAddr = ((addr_t*)commPageAddress)[COMMPAGE_ENTRY_X86_THREAD_EXIT]
291		+ commPageAddress;
292	clear_ac();
293	if (user_memcpy((void*)stackTop, (const void*)&codeAddr, sizeof(codeAddr))
294			!= B_OK)
295		return B_BAD_ADDRESS;
296
297	// Prepare the user iframe.
298	iframe frame = {};
299	frame.type = IFRAME_TYPE_SYSCALL;
300	frame.si = (uint64)args2;
301	frame.di = (uint64)args1;
302	frame.ip = entry;
303	frame.cs = USER_CODE_SELECTOR;
304	frame.flags = X86_EFLAGS_RESERVED1 | X86_EFLAGS_INTERRUPT;
305	frame.sp = stackTop;
306	frame.ss = USER_DATA_SELECTOR;
307
308	// Return to userland. Never returns.
309	x86_initial_return_to_userland(thread, &frame);
310
311	return B_OK;
312}
313
314
315/*!	Sets up the user iframe for invoking a signal handler.
316
317	The function fills in the remaining fields of the given \a signalFrameData,
318	copies it to the thread's userland stack (the one on which the signal shall
319	be handled), and sets up the user iframe so that when returning to userland
320	a wrapper function is executed that calls the user-defined signal handler.
321	When the signal handler returns, the wrapper function shall call the
322	"restore signal frame" syscall with the (possibly modified) signal frame
323	data.
324
325	The following fields of the \a signalFrameData structure still need to be
326	filled in:
327	- \c context.uc_stack: The stack currently used by the thread.
328	- \c context.uc_mcontext: The current userland state of the registers.
329	- \c syscall_restart_return_value: Architecture specific use. On x86_64 the
330		value of rax which is overwritten by the syscall return value.
331
332	Furthermore the function needs to set \c thread->user_signal_context to the
333	userland pointer to the \c ucontext_t on the user stack.
334
335	\param thread The current thread.
336	\param action The signal action specified for the signal to be handled.
337	\param signalFrameData A partially initialized structure of all the data
338		that need to be copied to userland.
339	\return \c B_OK on success, another error code, if something goes wrong.
340*/
341status_t
342arch_setup_signal_frame(Thread* thread, struct sigaction* action,
343	struct signal_frame_data* signalFrameData)
344{
345	iframe* frame = x86_get_current_iframe();
346	if (!IFRAME_IS_USER(frame)) {
347		panic("arch_setup_signal_frame(): No user iframe!");
348		return B_BAD_VALUE;
349	}
350
351	// Store the register state.
352	signalFrameData->context.uc_mcontext.rax = frame->ax;
353	signalFrameData->context.uc_mcontext.rbx = frame->bx;
354	signalFrameData->context.uc_mcontext.rcx = frame->cx;
355	signalFrameData->context.uc_mcontext.rdx = frame->dx;
356	signalFrameData->context.uc_mcontext.rdi = frame->di;
357	signalFrameData->context.uc_mcontext.rsi = frame->si;
358	signalFrameData->context.uc_mcontext.rbp = frame->bp;
359	signalFrameData->context.uc_mcontext.r8 = frame->r8;
360	signalFrameData->context.uc_mcontext.r9 = frame->r9;
361	signalFrameData->context.uc_mcontext.r10 = frame->r10;
362	signalFrameData->context.uc_mcontext.r11 = frame->r11;
363	signalFrameData->context.uc_mcontext.r12 = frame->r12;
364	signalFrameData->context.uc_mcontext.r13 = frame->r13;
365	signalFrameData->context.uc_mcontext.r14 = frame->r14;
366	signalFrameData->context.uc_mcontext.r15 = frame->r15;
367	signalFrameData->context.uc_mcontext.rsp = frame->user_sp;
368	signalFrameData->context.uc_mcontext.rip = frame->ip;
369	signalFrameData->context.uc_mcontext.rflags = frame->flags;
370
371	if (frame->fpu != nullptr) {
372		memcpy((void*)&signalFrameData->context.uc_mcontext.fpu, frame->fpu,
373			gFPUSaveLength);
374	} else {
375		memcpy((void*)&signalFrameData->context.uc_mcontext.fpu,
376			sInitialState.fpu_state, gFPUSaveLength);
377	}
378
379	// Fill in signalFrameData->context.uc_stack.
380	signal_get_user_stack(frame->user_sp, &signalFrameData->context.uc_stack);
381
382	// Store syscall_restart_return_value.
383	signalFrameData->syscall_restart_return_value = frame->orig_rax;
384
385	// Get the stack to use and copy the frame data to it.
386	uint8* userStack = get_signal_stack(thread, frame, action,
387		sizeof(*signalFrameData) + sizeof(frame->ip));
388
389	signal_frame_data* userSignalFrameData
390		= (signal_frame_data*)(userStack + sizeof(frame->ip));
391
392	if (user_memcpy(userSignalFrameData, signalFrameData,
393			sizeof(*signalFrameData)) != B_OK) {
394		return B_BAD_ADDRESS;
395	}
396
397	// Copy a return address to the stack so that backtraces will be correct.
398	if (user_memcpy(userStack, &frame->ip, sizeof(frame->ip)) != B_OK)
399		return B_BAD_ADDRESS;
400
401	// Update Thread::user_signal_context, now that everything seems to have
402	// gone fine.
403	thread->user_signal_context = &userSignalFrameData->context;
404
405	// Set up the iframe to execute the signal handler wrapper on our prepared
406	// stack. First argument points to the frame data.
407	addr_t* commPageAddress = (addr_t*)thread->team->commpage_address;
408	frame->user_sp = (addr_t)userStack;
409	set_ac();
410	frame->ip = commPageAddress[COMMPAGE_ENTRY_X86_SIGNAL_HANDLER]
411		+ (addr_t)commPageAddress;
412	clear_ac();
413	frame->di = (addr_t)userSignalFrameData;
414	frame->flags &= ~(uint64)(X86_EFLAGS_TRAP | X86_EFLAGS_DIRECTION);
415
416	return B_OK;
417}
418
419
420int64
421arch_restore_signal_frame(struct signal_frame_data* signalFrameData)
422{
423	iframe* frame = x86_get_current_iframe();
424
425	frame->orig_rax = signalFrameData->syscall_restart_return_value;
426	frame->ax = signalFrameData->context.uc_mcontext.rax;
427	frame->bx = signalFrameData->context.uc_mcontext.rbx;
428	frame->cx = signalFrameData->context.uc_mcontext.rcx;
429	frame->dx = signalFrameData->context.uc_mcontext.rdx;
430	frame->di = signalFrameData->context.uc_mcontext.rdi;
431	frame->si = signalFrameData->context.uc_mcontext.rsi;
432	frame->bp = signalFrameData->context.uc_mcontext.rbp;
433	frame->r8 = signalFrameData->context.uc_mcontext.r8;
434	frame->r9 = signalFrameData->context.uc_mcontext.r9;
435	frame->r10 = signalFrameData->context.uc_mcontext.r10;
436	frame->r11 = signalFrameData->context.uc_mcontext.r11;
437	frame->r12 = signalFrameData->context.uc_mcontext.r12;
438	frame->r13 = signalFrameData->context.uc_mcontext.r13;
439	frame->r14 = signalFrameData->context.uc_mcontext.r14;
440	frame->r15 = signalFrameData->context.uc_mcontext.r15;
441	frame->user_sp = signalFrameData->context.uc_mcontext.rsp;
442	frame->ip = signalFrameData->context.uc_mcontext.rip;
443	frame->flags = (frame->flags & ~(uint64)X86_EFLAGS_USER_FLAGS)
444		| (signalFrameData->context.uc_mcontext.rflags & X86_EFLAGS_USER_FLAGS);
445
446	Thread* thread = thread_get_current_thread();
447
448	memcpy(thread->arch_info.fpu_state,
449		(void*)&signalFrameData->context.uc_mcontext.fpu, gFPUSaveLength);
450	frame->fpu = &thread->arch_info.fpu_state;
451
452	// The syscall return code overwrites frame->ax with the return value of
453	// the syscall, need to return it here to ensure the correct value is
454	// restored.
455	return frame->ax;
456}
457