1/* 2 * Copyright 2014, General Dynamics C4 Systems 3 * 4 * SPDX-License-Identifier: GPL-2.0-only 5 */ 6 7#include <config.h> 8#include <model/statedata.h> 9#include <machine/fpu.h> 10#include <arch/fastpath/fastpath.h> 11#include <arch/kernel/traps.h> 12#include <machine/debug.h> 13#include <arch/object/vcpu.h> 14#include <api/syscall.h> 15#include <sel4/arch/vmenter.h> 16 17#include <benchmark/benchmark_track.h> 18#include <benchmark/benchmark_utilisation.h> 19 20void VISIBLE c_nested_interrupt(int irq) 21{ 22 /* This is not a real entry point, so we do not grab locks or 23 * run c_entry/exit_hooks, since this occurs only if we're already 24 * running inside the kernel. Just record the irq and return */ 25 assert(ARCH_NODE_STATE(x86KSPendingInterrupt) == int_invalid); 26 ARCH_NODE_STATE(x86KSPendingInterrupt) = irq; 27} 28 29void VISIBLE NORETURN c_handle_interrupt(int irq, int syscall) 30{ 31 /* need to run this first as the NODE_LOCK code might end up as a function call 32 * with a return, and we need to make sure returns are not exploitable yet 33 * on x64 this code ran already */ 34 if (config_set(CONFIG_ARCH_IA32) && config_set(CONFIG_KERNEL_X86_IBRS_BASIC)) { 35 x86_enable_ibrs(); 36 } 37 38 /* Only grab the lock if we are not handeling 'int_remote_call_ipi' interrupt 39 * also flag this lock as IRQ lock if handling the irq interrupts. */ 40 NODE_LOCK_IF(irq != int_remote_call_ipi, 41 irq >= int_irq_min && irq <= int_irq_max); 42 43 c_entry_hook(); 44 45 if (irq == int_unimpl_dev) { 46 handleFPUFault(); 47#ifdef TRACK_KERNEL_ENTRIES 48 ksKernelEntry.path = Entry_UnimplementedDevice; 49 ksKernelEntry.word = irq; 50#endif 51 } else if (irq == int_page_fault) { 52 /* Error code is in Error. Pull out bit 5, which is whether it was instruction or data */ 53 vm_fault_type_t type = (NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[Error] >> 4u) & 1u; 54#ifdef TRACK_KERNEL_ENTRIES 55 ksKernelEntry.path = Entry_VMFault; 56 ksKernelEntry.word = type; 57#endif 58 handleVMFaultEvent(type); 59#ifdef CONFIG_HARDWARE_DEBUG_API 60 } else if (irq == int_debug || irq == int_software_break_request) { 61 /* Debug exception */ 62#ifdef TRACK_KERNEL_ENTRIES 63 ksKernelEntry.path = Entry_DebugFault; 64 ksKernelEntry.word = NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[FaultIP]; 65#endif 66 handleUserLevelDebugException(irq); 67#endif /* CONFIG_HARDWARE_DEBUG_API */ 68 } else if (irq < int_irq_min) { 69#ifdef TRACK_KERNEL_ENTRIES 70 ksKernelEntry.path = Entry_UserLevelFault; 71 ksKernelEntry.word = irq; 72#endif 73 handleUserLevelFault(irq, NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[Error]); 74 } else if (likely(irq < int_trap_min)) { 75 ARCH_NODE_STATE(x86KScurInterrupt) = irq; 76#ifdef TRACK_KERNEL_ENTRIES 77 ksKernelEntry.path = Entry_Interrupt; 78 ksKernelEntry.word = irq; 79#endif 80 handleInterruptEntry(); 81 /* check for other pending interrupts */ 82 receivePendingIRQ(); 83 } else if (irq == int_spurious) { 84 /* fall through to restore_user_context and do nothing */ 85 } else { 86 /* Interpret a trap as an unknown syscall */ 87 /* Adjust FaultIP to point to trapping INT 88 * instruction by subtracting 2 */ 89 int sys_num; 90 NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[FaultIP] -= 2; 91 /* trap number is MSBs of the syscall number and the LSBS of EAX */ 92 sys_num = (irq << 24) | (syscall & 0x00ffffff); 93#ifdef TRACK_KERNEL_ENTRIES 94 ksKernelEntry.path = Entry_UnknownSyscall; 95 ksKernelEntry.word = sys_num; 96#endif 97 handleUnknownSyscall(sys_num); 98 } 99 restore_user_context(); 100 UNREACHABLE(); 101} 102 103void NORETURN slowpath(syscall_t syscall) 104{ 105 106#ifdef CONFIG_VTX 107 if (syscall == SysVMEnter && NODE_STATE(ksCurThread)->tcbArch.tcbVCPU) { 108 vcpu_update_state_sysvmenter(NODE_STATE(ksCurThread)->tcbArch.tcbVCPU); 109 if (NODE_STATE(ksCurThread)->tcbBoundNotification 110 && notification_ptr_get_state(NODE_STATE(ksCurThread)->tcbBoundNotification) == NtfnState_Active) { 111 completeSignal(NODE_STATE(ksCurThread)->tcbBoundNotification, NODE_STATE(ksCurThread)); 112 setRegister(NODE_STATE(ksCurThread), msgInfoRegister, SEL4_VMENTER_RESULT_NOTIF); 113 /* Any guest state that we should return is in the same 114 * register position as sent to us, so we can just return 115 * and let the user pick up the values they put in */ 116 restore_user_context(); 117 } else { 118 setThreadState(NODE_STATE(ksCurThread), ThreadState_RunningVM); 119 restore_user_context(); 120 } 121 } 122#endif 123 /* check for undefined syscall */ 124 if (unlikely(syscall < SYSCALL_MIN || syscall > SYSCALL_MAX)) { 125#ifdef TRACK_KERNEL_ENTRIES 126 ksKernelEntry.path = Entry_UnknownSyscall; 127 /* ksKernelEntry.word word is already set to syscall */ 128#endif /* TRACK_KERNEL_ENTRIES */ 129 handleUnknownSyscall(syscall); 130 } else { 131#ifdef TRACK_KERNEL_ENTRIES 132 ksKernelEntry.is_fastpath = 0; 133#endif /* TRACK KERNEL ENTRIES */ 134 handleSyscall(syscall); 135 } 136 137 restore_user_context(); 138 UNREACHABLE(); 139} 140 141#ifdef CONFIG_KERNEL_MCS 142void VISIBLE NORETURN c_handle_syscall(word_t cptr, word_t msgInfo, syscall_t syscall, word_t reply) 143#else 144void VISIBLE NORETURN c_handle_syscall(word_t cptr, word_t msgInfo, syscall_t syscall) 145#endif 146{ 147 /* need to run this first as the NODE_LOCK code might end up as a function call 148 * with a return, and we need to make sure returns are not exploitable yet */ 149 if (config_set(CONFIG_KERNEL_X86_IBRS_BASIC)) { 150 x86_enable_ibrs(); 151 } 152 153 NODE_LOCK_SYS; 154 155 c_entry_hook(); 156 157#ifdef TRACK_KERNEL_ENTRIES 158 benchmark_debug_syscall_start(cptr, msgInfo, syscall); 159 ksKernelEntry.is_fastpath = 1; 160#endif /* TRACK_KERNEL_ENTRIES */ 161 162 if (config_set(CONFIG_SYSENTER)) { 163 /* increment NextIP to skip sysenter */ 164 NODE_STATE(ksCurThread)->tcbArch.tcbContext.registers[NextIP] += 2; 165 } else { 166 /* set FaultIP */ 167 setRegister(NODE_STATE(ksCurThread), FaultIP, getRegister(NODE_STATE(ksCurThread), NextIP) - 2); 168 } 169 170#ifdef CONFIG_FASTPATH 171 if (syscall == (syscall_t)SysCall) { 172 fastpath_call(cptr, msgInfo); 173 UNREACHABLE(); 174 } else if (syscall == (syscall_t)SysReplyRecv) { 175#ifdef CONFIG_KERNEL_MCS 176 fastpath_reply_recv(cptr, msgInfo, reply); 177#else 178 fastpath_reply_recv(cptr, msgInfo); 179#endif 180 UNREACHABLE(); 181 } 182#endif /* CONFIG_FASTPATH */ 183 slowpath(syscall); 184 UNREACHABLE(); 185} 186 187#ifdef CONFIG_VTX 188void VISIBLE NORETURN c_handle_vmexit(void) 189{ 190#ifdef TRACK_KERNEL_ENTRIES 191 ksKernelEntry.path = Entry_VMExit; 192#endif 193 194 /* We *always* need to flush the rsb as a guest may have been able to train the rsb with kernel addresses */ 195 x86_flush_rsb(); 196 197 /* When we switched out of VMX mode the FS and GS registers were 198 * clobbered and set to potentially undefined values. we need to 199 * make sure we reload the correct values of FS and GS. 200 * Unfortunately our cached values in x86KSCurrent[FG]SBase now 201 * mismatch what is in the hardware. To force a reload to happen we 202 * set the cached value to something that is guaranteed to not be 203 * the target threads value, ensuring both the cache and the 204 * hardware get updated. 205 * 206 * This needs to happen before the entry hook which will try to 207 * restore the registers without having a means to determine whether 208 * they may have been dirtied by a VM exit. */ 209 tcb_t *cur_thread = NODE_STATE(ksCurThread); 210 ARCH_NODE_STATE(x86KSCurrentGSBase) = -(word_t)1; 211 ARCH_NODE_STATE(x86KSCurrentFSBase) = -(word_t)1; 212 x86_load_fsgs_base(cur_thread, SMP_TERNARY(getCurrentCPUIndex(), 0)); 213 214 c_entry_hook(); 215 /* NODE_LOCK will get called in handleVmexit */ 216 handleVmexit(); 217 restore_user_context(); 218 UNREACHABLE(); 219} 220#endif 221