1/*
2 * Copyright 2014, General Dynamics C4 Systems
3 *
4 * This software may be distributed and modified according to the terms of
5 * the GNU General Public License version 2. Note that NO WARRANTY is provided.
6 * See "LICENSE_GPLv2.txt" for details.
7 *
8 * @TAG(GD_GPL)
9 */
10
11#include <config.h>
12#include <model/statedata.h>
13#include <machine/fpu.h>
14#include <arch/fastpath/fastpath.h>
15#include <arch/kernel/traps.h>
16#include <machine/debug.h>
17#include <arch/object/vcpu.h>
18#include <api/syscall.h>
19#include <arch/api/vmenter.h>
20
21#include <benchmark/benchmark_track.h>
22#include <benchmark/benchmark_utilisation.h>
23
24void VISIBLE
25c_nested_interrupt(int irq)
26{
27    /* This is not a real entry point, so we do not grab locks or
28     * run c_entry/exit_hooks, since this occurs only if we're already
29     * running inside the kernel. Just record the irq and return */
30    assert(ARCH_NODE_STATE(x86KSPendingInterrupt) == int_invalid);
31    ARCH_NODE_STATE(x86KSPendingInterrupt) = irq;
32}
33
34void VISIBLE NORETURN
35c_handle_interrupt(int irq, int syscall)
36{
37    /* need to run this first as the NODE_LOCK code might end up as a function call
38     * with a return, and we need to make sure returns are not exploitable yet
39     * on x64 this code ran already */
40    if (config_set(CONFIG_ARCH_IA32) && config_set(CONFIG_KERNEL_X86_IBRS_BASIC)) {
41        x86_enable_ibrs();
42    }
43
44    /* Only grab the lock if we are not handeling 'int_remote_call_ipi' interrupt
45     * also flag this lock as IRQ lock if handling the irq interrupts. */
46    NODE_LOCK_IF(irq != int_remote_call_ipi,
47                 irq >= int_irq_min && irq <= int_irq_max);
48
49    c_entry_hook();
50
51    if (irq == int_unimpl_dev) {
52        handleFPUFault();
53#ifdef TRACK_KERNEL_ENTRIES
54        ksKernelEntry.path = Entry_UnimplementedDevice;
55        ksKernelEntry.word = irq;
56#endif
57    } else if (irq == int_page_fault) {
58        /* Error code is in Error. Pull out bit 5, which is whether it was instruction or data */
59        vm_fault_type_t type = (NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[Error] >> 4u) & 1u;
60#ifdef TRACK_KERNEL_ENTRIES
61        ksKernelEntry.path = Entry_VMFault;
62        ksKernelEntry.word = type;
63#endif
64        handleVMFaultEvent(type);
65#ifdef CONFIG_HARDWARE_DEBUG_API
66    } else if (irq == int_debug || irq == int_software_break_request) {
67        /* Debug exception */
68#ifdef TRACK_KERNEL_ENTRIES
69        ksKernelEntry.path = Entry_DebugFault;
70        ksKernelEntry.word = NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[FaultIP];
71#endif
72        handleUserLevelDebugException(irq);
73#endif /* CONFIG_HARDWARE_DEBUG_API */
74    } else if (irq < int_irq_min) {
75#ifdef TRACK_KERNEL_ENTRIES
76        ksKernelEntry.path = Entry_UserLevelFault;
77        ksKernelEntry.word = irq;
78#endif
79        handleUserLevelFault(irq, NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[Error]);
80    } else if (likely(irq < int_trap_min)) {
81        ARCH_NODE_STATE(x86KScurInterrupt) = irq;
82#ifdef TRACK_KERNEL_ENTRIES
83        ksKernelEntry.path = Entry_Interrupt;
84        ksKernelEntry.word = irq;
85#endif
86        handleInterruptEntry();
87        /* check for other pending interrupts */
88        receivePendingIRQ();
89    } else if (irq == int_spurious) {
90        /* fall through to restore_user_context and do nothing */
91    } else {
92        /* Interpret a trap as an unknown syscall */
93        /* Adjust FaultIP to point to trapping INT
94         * instruction by subtracting 2 */
95        int sys_num;
96        NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[FaultIP] -= 2;
97        /* trap number is MSBs of the syscall number and the LSBS of EAX */
98        sys_num = (irq << 24) | (syscall & 0x00ffffff);
99#ifdef TRACK_KERNEL_ENTRIES
100        ksKernelEntry.path = Entry_UnknownSyscall;
101        ksKernelEntry.word = sys_num;
102#endif
103        handleUnknownSyscall(sys_num);
104    }
105    restore_user_context();
106    UNREACHABLE();
107}
108
109void NORETURN
110slowpath(syscall_t syscall)
111{
112
113#ifdef CONFIG_VTX
114    if (syscall == SysVMEnter) {
115        vcpu_update_state_sysvmenter(NODE_STATE(ksCurThread)->tcbArch.tcbVCPU);
116        if (NODE_STATE(ksCurThread)->tcbBoundNotification && notification_ptr_get_state(NODE_STATE(ksCurThread)->tcbBoundNotification) == NtfnState_Active) {
117            completeSignal(NODE_STATE(ksCurThread)->tcbBoundNotification, NODE_STATE(ksCurThread));
118            setRegister(NODE_STATE(ksCurThread), msgInfoRegister, SEL4_VMENTER_RESULT_NOTIF);
119            /* Any guest state that we should return is in the same
120             * register position as sent to us, so we can just return
121             * and let the user pick up the values they put in */
122            restore_user_context();
123        } else {
124            setThreadState(NODE_STATE(ksCurThread), ThreadState_RunningVM);
125            restore_user_context();
126        }
127    }
128#endif
129    /* check for undefined syscall */
130    if (unlikely(syscall < SYSCALL_MIN || syscall > SYSCALL_MAX)) {
131#ifdef TRACK_KERNEL_ENTRIES
132        ksKernelEntry.path = Entry_UnknownSyscall;
133        /* ksKernelEntry.word word is already set to syscall */
134#endif /* TRACK_KERNEL_ENTRIES */
135        handleUnknownSyscall(syscall);
136    } else {
137#ifdef TRACK_KERNEL_ENTRIES
138        ksKernelEntry.is_fastpath = 0;
139#endif /* TRACK KERNEL ENTRIES */
140        handleSyscall(syscall);
141    }
142
143    restore_user_context();
144    UNREACHABLE();
145}
146
147void VISIBLE NORETURN
148c_handle_syscall(word_t cptr, word_t msgInfo, syscall_t syscall, word_t reply)
149{
150    /* need to run this first as the NODE_LOCK code might end up as a function call
151     * with a return, and we need to make sure returns are not exploitable yet */
152    if (config_set(CONFIG_KERNEL_X86_IBRS_BASIC)) {
153        x86_enable_ibrs();
154    }
155
156    NODE_LOCK_SYS;
157
158    c_entry_hook();
159
160#ifdef TRACK_KERNEL_ENTRIES
161    benchmark_debug_syscall_start(cptr, msgInfo, syscall);
162    ksKernelEntry.is_fastpath = 1;
163#endif /* TRACK_KERNEL_ENTRIES */
164
165    if (config_set(CONFIG_SYSENTER)) {
166        /* increment NextIP to skip sysenter */
167        NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[NextIP] += 2;
168    } else {
169        /* set FaultIP */
170        setRegister(NODE_STATE(ksCurThread), FaultIP, getRegister(NODE_STATE(ksCurThread), NextIP) - 2);
171    }
172
173#ifdef CONFIG_FASTPATH
174    if (syscall == (syscall_t)SysCall) {
175        fastpath_call(cptr, msgInfo);
176        UNREACHABLE();
177    } else if (syscall == (syscall_t)SysReplyRecv) {
178        fastpath_reply_recv(cptr, msgInfo, reply);
179        UNREACHABLE();
180    }
181#endif /* CONFIG_FASTPATH */
182    slowpath(syscall);
183    UNREACHABLE();
184}
185
186#ifdef CONFIG_VTX
187void VISIBLE NORETURN c_handle_vmexit(void)
188{
189#ifdef TRACK_KERNEL_ENTRIES
190    ksKernelEntry.path = Entry_VMExit;
191#endif
192
193    /* We *always* need to flush the rsb as a guest may have been able to train the rsb with kernel addresses */
194    x86_flush_rsb();
195
196    c_entry_hook();
197    /* NODE_LOCK will get called in handleVmexit */
198    handleVmexit();
199    /* When we switched out of VMX mode the FS and GS registers were clobbered
200     * and set to potentially undefined values. If we are going to switch back
201     * to VMX mode then this is fine, but if we are switching to user mode we
202     * need to make sure we reload the correct values of FS and GS. Unfortunately
203     * our cached values in x86KSCurrent[FG]SBase now mismatch what is in the
204     * hardware. To force a reload to happen we set the cached value to something
205     * that is guaranteed to not be the target threads value, ensuring both
206     * the cache and the hardware get updated */
207    tcb_t *cur_thread = NODE_STATE(ksCurThread);
208    if (thread_state_ptr_get_tsType(&cur_thread->tcbState) != ThreadState_RunningVM) {
209        ARCH_NODE_STATE(x86KSCurrentGSBase) = -(word_t)1;
210        ARCH_NODE_STATE(x86KSCurrentFSBase) = -(word_t)1;
211    }
212    restore_user_context();
213    UNREACHABLE();
214}
215#endif
216