1/*
2 * Copyright 2014, General Dynamics C4 Systems
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 */
6
7#include <config.h>
8#include <model/statedata.h>
9#include <machine/fpu.h>
10#include <arch/fastpath/fastpath.h>
11#include <arch/kernel/traps.h>
12#include <machine/debug.h>
13#include <arch/object/vcpu.h>
14#include <api/syscall.h>
15#include <sel4/arch/vmenter.h>
16
17#include <benchmark/benchmark_track.h>
18#include <benchmark/benchmark_utilisation.h>
19
20void VISIBLE c_nested_interrupt(int irq)
21{
22    /* This is not a real entry point, so we do not grab locks or
23     * run c_entry/exit_hooks, since this occurs only if we're already
24     * running inside the kernel. Just record the irq and return */
25    assert(ARCH_NODE_STATE(x86KSPendingInterrupt) == int_invalid);
26    ARCH_NODE_STATE(x86KSPendingInterrupt) = irq;
27}
28
29void VISIBLE NORETURN c_handle_interrupt(int irq, int syscall)
30{
31    /* need to run this first as the NODE_LOCK code might end up as a function call
32     * with a return, and we need to make sure returns are not exploitable yet
33     * on x64 this code ran already */
34    if (config_set(CONFIG_ARCH_IA32) && config_set(CONFIG_KERNEL_X86_IBRS_BASIC)) {
35        x86_enable_ibrs();
36    }
37
38    /* Only grab the lock if we are not handeling 'int_remote_call_ipi' interrupt
39     * also flag this lock as IRQ lock if handling the irq interrupts. */
40    NODE_LOCK_IF(irq != int_remote_call_ipi,
41                 irq >= int_irq_min && irq <= int_irq_max);
42
43    c_entry_hook();
44
45    if (irq == int_unimpl_dev) {
46        handleFPUFault();
47#ifdef TRACK_KERNEL_ENTRIES
48        ksKernelEntry.path = Entry_UnimplementedDevice;
49        ksKernelEntry.word = irq;
50#endif
51    } else if (irq == int_page_fault) {
52        /* Error code is in Error. Pull out bit 5, which is whether it was instruction or data */
53        vm_fault_type_t type = (NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[Error] >> 4u) & 1u;
54#ifdef TRACK_KERNEL_ENTRIES
55        ksKernelEntry.path = Entry_VMFault;
56        ksKernelEntry.word = type;
57#endif
58        handleVMFaultEvent(type);
59#ifdef CONFIG_HARDWARE_DEBUG_API
60    } else if (irq == int_debug || irq == int_software_break_request) {
61        /* Debug exception */
62#ifdef TRACK_KERNEL_ENTRIES
63        ksKernelEntry.path = Entry_DebugFault;
64        ksKernelEntry.word = NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[FaultIP];
65#endif
66        handleUserLevelDebugException(irq);
67#endif /* CONFIG_HARDWARE_DEBUG_API */
68    } else if (irq < int_irq_min) {
69#ifdef TRACK_KERNEL_ENTRIES
70        ksKernelEntry.path = Entry_UserLevelFault;
71        ksKernelEntry.word = irq;
72#endif
73        handleUserLevelFault(irq, NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[Error]);
74    } else if (likely(irq < int_trap_min)) {
75        ARCH_NODE_STATE(x86KScurInterrupt) = irq;
76#ifdef TRACK_KERNEL_ENTRIES
77        ksKernelEntry.path = Entry_Interrupt;
78        ksKernelEntry.word = irq;
79#endif
80        handleInterruptEntry();
81        /* check for other pending interrupts */
82        receivePendingIRQ();
83    } else if (irq == int_spurious) {
84        /* fall through to restore_user_context and do nothing */
85    } else {
86        /* Interpret a trap as an unknown syscall */
87        /* Adjust FaultIP to point to trapping INT
88         * instruction by subtracting 2 */
89        int sys_num;
90        NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[FaultIP] -= 2;
91        /* trap number is MSBs of the syscall number and the LSBS of EAX */
92        sys_num = (irq << 24) | (syscall & 0x00ffffff);
93#ifdef TRACK_KERNEL_ENTRIES
94        ksKernelEntry.path = Entry_UnknownSyscall;
95        ksKernelEntry.word = sys_num;
96#endif
97        handleUnknownSyscall(sys_num);
98    }
99    restore_user_context();
100    UNREACHABLE();
101}
102
103void NORETURN slowpath(syscall_t syscall)
104{
105
106#ifdef CONFIG_VTX
107    if (syscall == SysVMEnter && NODE_STATE(ksCurThread)->tcbArch.tcbVCPU) {
108        vcpu_update_state_sysvmenter(NODE_STATE(ksCurThread)->tcbArch.tcbVCPU);
109        if (NODE_STATE(ksCurThread)->tcbBoundNotification
110            && notification_ptr_get_state(NODE_STATE(ksCurThread)->tcbBoundNotification) == NtfnState_Active) {
111            completeSignal(NODE_STATE(ksCurThread)->tcbBoundNotification, NODE_STATE(ksCurThread));
112            setRegister(NODE_STATE(ksCurThread), msgInfoRegister, SEL4_VMENTER_RESULT_NOTIF);
113            /* Any guest state that we should return is in the same
114             * register position as sent to us, so we can just return
115             * and let the user pick up the values they put in */
116            restore_user_context();
117        } else {
118            setThreadState(NODE_STATE(ksCurThread), ThreadState_RunningVM);
119            restore_user_context();
120        }
121    }
122#endif
123    /* check for undefined syscall */
124    if (unlikely(syscall < SYSCALL_MIN || syscall > SYSCALL_MAX)) {
125#ifdef TRACK_KERNEL_ENTRIES
126        ksKernelEntry.path = Entry_UnknownSyscall;
127        /* ksKernelEntry.word word is already set to syscall */
128#endif /* TRACK_KERNEL_ENTRIES */
129        handleUnknownSyscall(syscall);
130    } else {
131#ifdef TRACK_KERNEL_ENTRIES
132        ksKernelEntry.is_fastpath = 0;
133#endif /* TRACK KERNEL ENTRIES */
134        handleSyscall(syscall);
135    }
136
137    restore_user_context();
138    UNREACHABLE();
139}
140
141#ifdef CONFIG_KERNEL_MCS
142void VISIBLE NORETURN c_handle_syscall(word_t cptr, word_t msgInfo, syscall_t syscall, word_t reply)
143#else
144void VISIBLE NORETURN c_handle_syscall(word_t cptr, word_t msgInfo, syscall_t syscall)
145#endif
146{
147    /* need to run this first as the NODE_LOCK code might end up as a function call
148     * with a return, and we need to make sure returns are not exploitable yet */
149    if (config_set(CONFIG_KERNEL_X86_IBRS_BASIC)) {
150        x86_enable_ibrs();
151    }
152
153    NODE_LOCK_SYS;
154
155    c_entry_hook();
156
157#ifdef TRACK_KERNEL_ENTRIES
158    benchmark_debug_syscall_start(cptr, msgInfo, syscall);
159    ksKernelEntry.is_fastpath = 1;
160#endif /* TRACK_KERNEL_ENTRIES */
161
162    if (config_set(CONFIG_SYSENTER)) {
163        /* increment NextIP to skip sysenter */
164        NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[NextIP] += 2;
165    } else {
166        /* set FaultIP */
167        setRegister(NODE_STATE(ksCurThread), FaultIP, getRegister(NODE_STATE(ksCurThread), NextIP) - 2);
168    }
169
170#ifdef CONFIG_FASTPATH
171    if (syscall == (syscall_t)SysCall) {
172        fastpath_call(cptr, msgInfo);
173        UNREACHABLE();
174    } else if (syscall == (syscall_t)SysReplyRecv) {
175#ifdef CONFIG_KERNEL_MCS
176        fastpath_reply_recv(cptr, msgInfo, reply);
177#else
178        fastpath_reply_recv(cptr, msgInfo);
179#endif
180        UNREACHABLE();
181    }
182#endif /* CONFIG_FASTPATH */
183    slowpath(syscall);
184    UNREACHABLE();
185}
186
187#ifdef CONFIG_VTX
188void VISIBLE NORETURN c_handle_vmexit(void)
189{
190#ifdef TRACK_KERNEL_ENTRIES
191    ksKernelEntry.path = Entry_VMExit;
192#endif
193
194    /* We *always* need to flush the rsb as a guest may have been able to train the rsb with kernel addresses */
195    x86_flush_rsb();
196
197    /* When we switched out of VMX mode the FS and GS registers were
198     * clobbered and set to potentially undefined values. we need to
199     * make sure we reload the correct values of FS and GS.
200     * Unfortunately our cached values in x86KSCurrent[FG]SBase now
201     * mismatch what is in the hardware. To force a reload to happen we
202     * set the cached value to something that is guaranteed to not be
203     * the target threads value, ensuring both the cache and the
204     * hardware get updated.
205     *
206     * This needs to happen before the entry hook which will try to
207     * restore the registers without having a means to determine whether
208     * they may have been dirtied by a VM exit. */
209    tcb_t *cur_thread = NODE_STATE(ksCurThread);
210    ARCH_NODE_STATE(x86KSCurrentGSBase) = -(word_t)1;
211    ARCH_NODE_STATE(x86KSCurrentFSBase) = -(word_t)1;
212    x86_load_fsgs_base(cur_thread, SMP_TERNARY(getCurrentCPUIndex(), 0));
213
214    c_entry_hook();
215    /* NODE_LOCK will get called in handleVmexit */
216    handleVmexit();
217    restore_user_context();
218    UNREACHABLE();
219}
220#endif
221