1/** 2 * \file 3 * \brief System call entry point to the kernel and LRPC fast-path 4 */ 5 6/* 7 * Copyright (c) 2007, 2008, 2009, 2010, 2016, ETH Zurich. 8 * All rights reserved. 9 * 10 * This file is distributed under the terms in the attached LICENSE file. 11 * If you do not find this file, copies can be found by writing to: 12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group. 13 */ 14 15#include <barrelfish_kpi/syscalls.h> 16#include <barrelfish_kpi/capabilities.h> 17#include <barrelfish_kpi/lmp.h> 18#include <x86.h> 19#include <asmoffsets.h> 20 21#ifdef __k1om__ 22#include <target/k1om/offsets_target.h> 23#define KERNEL_STACK_SIZE K1OM_KERNEL_STACK_SIZE 24#define KERNEL_STACK k1om_kernel_stack 25#define PHYS_TO_MEM_OFFSET 0xffffff0000000000 26#else 27#include <target/x86_64/offsets_target.h> 28#define KERNEL_STACK_SIZE X86_64_KERNEL_STACK_SIZE 29#define KERNEL_STACK x86_64_kernel_stack 30#define PHYS_TO_MEM_OFFSET 0xfffffe0000000000 31#endif 32 33 .text 34 .globl syscall_entry 35 36syscall_entry: 37 /* is this an LRPC or a normal syscall? */ 38 cmp $SYSCALL_LRPC, %rdi 39 jne syscall_path /* normal syscall, branch off */ 40 41 /* Load pointer to current DCB */ 42 mov dcb_current(%rip), %rdi 43 44 /* TODO: Check that caller is not disabled */ 45 46 /* dcb_current->disabled=false */ 47 movb $0, OFFSETOF_DCB_DISABLED(%rdi) 48 49 /* Save caller's registers */ 50 mov OFFSETOF_DCB_DISP(%rdi), %rdi 51 lea OFFSETOF_DISP_X86_64_ENABLED_AREA(%rdi), %rdi 52 movq $SYS_ERR_OK, OFFSETOF_RAX_REG(%rdi) 53 mov %rcx, OFFSETOF_RIP_REG(%rdi) 54 mov %r11, OFFSETOF_EFLAGS_REG(%rdi) 55 mov %rsp, OFFSETOF_RSP_REG(%rdi) 56 mov %fs, OFFSETOF_FS_REG(%rdi) 57 mov %gs, OFFSETOF_GS_REG(%rdi) 58 fxsave OFFSETOF_FXSAVE_AREA(%rdi) 59 60 /* Load pointer to root CNode cap into %rdi */ 61 mov dcb_current(%rip), %rdi 62 lea OFFSETOF_DCB_CSPACE_CAP(%rdi), %rdi 63 64 cmpb $OBJTYPE_L1CNODE, OFFSETOF_CAP_TYPE(%rdi) 65 jne err_cspace 66 67 /* Deconstruct cap address in %rsi into L1/L2 indices */ 68 /* Store L1 index in r11 */ 69 mov %rsi, %r11 70 shr $L2_CNODE_BITS, %r11 71 /* Store L2 index in rsi */ 72 mov $1, %r15 73 shl $L2_CNODE_BITS, %r15 74 sub $1, %r15 75 and %r15, %rsi 76 77 /* Check that slot number is within CNode */ 78 movq OFFSETOF_CAP_L1CNODE_ALLOCATED_BYTES(%rdi), %rcx 79 shr $OBJBITS_CTE, %rcx 80 /* rcx: #slots in L1 CNode */ 81 cmp %rcx, %r11 82 jae err_slot 83 84 /* Load pointer to endpoint cap: do two-level lookup */ 85 /* deref slot in L1 cnode */ 86 /* scale index for array lookup */ 87 shl $OBJBITS_CTE, %r11 88 /* get cnode base address into rcx */ 89 mov OFFSETOF_CAP_L1CNODE_CNODE(%rdi), %rcx 90 /* phys_to_mem() */ 91 mov $PHYS_TO_MEM_OFFSET, %rdi // phys_to_mem() 92 add %rdi, %rcx 93 /* add offset into L1 CNode */ 94 add %r11, %rcx 95 96 /* Check that we found a L2 CNode */ 97 cmpb $OBJTYPE_L2CNODE, OFFSETOF_CAP_TYPE(%rcx) 98 jne err_l2cnode 99 100 /* L2 CNode cap pointer in %rcx */ 101 /* Load pointer to EP from L2 CNode; L2 slot index in %rsi */ 102 shl $OBJBITS_CTE, %rsi 103 mov OFFSETOF_CAP_L2CNODE_CNODE(%rcx), %rcx 104 /* phys_to_mem() */ 105 mov $PHYS_TO_MEM_OFFSET, %rdi 106 add %rdi, %rcx 107 /* add offset into L2 CNode */ 108 add %rsi, %rcx 109 110 /* Check that it's an endpoint */ 111 cmpb $OBJTYPE_ENDPOINT, OFFSETOF_CAP_TYPE(%rcx) 112 jne err_endpoint 113 114 /* TODO: Check rights on the endpoint */ 115 116 /* Set epoffset for receiver, load epbuflen */ 117 mov OFFSETOF_CAP_ENDPOINT_EPOFFSET(%rcx), %rdi 118 mov OFFSETOF_CAP_ENDPOINT_EPBUFLEN(%rcx), %r13d /* r13d = epbuflen */ 119 120 /* Load pointer to listener's DCB */ 121 mov OFFSETOF_CAP_ENDPOINT_LISTENER(%rcx), %rsi 122 123 /* Check whether listener is runnable */ 124#if defined(CONFIG_SCHEDULER_RR) 125 cmpl $0, OFFSETOF_DCB_RR_PREV(%rsi) 126 je lrpc_make_runnable 127#elif defined(CONFIG_SCHEDULER_RBED) 128 cmpl $0, OFFSETOF_DCB_RBED_NEXT(%rsi) 129 je lrpc_rbed_check_runnable 130#else 131# error Must define a kernel scheduling policy! 132#endif 133 134lrpc_check_runnable_continue: 135 /* Check whether listener is disabled */ 136 cmpb $0, OFFSETOF_DCB_DISABLED(%rsi) 137 jne err_disabled 138 139 /* RCX = target dispatcher */ 140 mov OFFSETOF_DCB_DISP(%rsi), %rcx 141 142 /* Remember LRPC entry point on target (R15) */ 143 mov OFFSETOF_DISP_LRPC(%rcx), %r15 144 145 /* check that the receiver has space in their buffer */ 146 add %rdi, %rcx /* add epoffset to dispatcher: rcx = endpoint pointer */ 147 mov OFFSETOF_LMP_ENDPOINT_DELIVERED(%rcx), %r11d /* r11d = delivered */ 148 mov %r11d, %r12d /* r12d = delivered */ 149 mov OFFSETOF_LMP_ENDPOINT_CONSUMED(%rcx), %r14d /* r14d = consumed */ 150 151 /* 152 * newpos = delivered + len; 153 * if (newpos >= consumed && consumed > delivered) 154 * goto err_buflen; 155 * if (newpos >= epbuflen) { 156 * newpos -= epbuflen; 157 * if (newpos >= consumed) 158 * goto err_buflen; 159 * } 160 * delivered = newpos 161 */ 162 163 add $(LRPC_MSG_LENGTH + LMP_RECV_HEADER_LENGTH), %r11d /* r11d (newpos) = delivered + len */ 164 165 cmp %r14d, %r11d 166 jb 1f /* if newpos < consumed */ 167 cmp %r12d, %r14d 168 ja err_buflen /* if consumed > delivered */ 169 1701: 171 cmp %r13d, %r11d 172 jb 2f /* if newpos < epbuflen */ 173 174 /* newpos >= epbuflen */ 175 sub %r13d, %r11d /* newpos (r11d) -= epbuflen (r13d) */ 176 cmp %r14d, %r11d /* if newpos >= consumed */ 177 jae err_buflen 178 1792: /* there's enough space, reserve it by updating delivered = newpos */ 180 mov %r11d, OFFSETOF_LMP_ENDPOINT_DELIVERED(%rcx) 181 182lrpc_switch_domains: 183 /* Set current domain to receiver */ 184 mov %rsi, dcb_current(%rip) 185 186 /* Switch to listener address space */ 187 mov OFFSETOF_DCB_VSPACE(%rsi), %rax 188 mov %rax, %cr3 189 190 /* Zero registers to avoid the receiver getting hold of them 191 * FIXME: should zero all non-payload registers */ 192 xor %eax, %eax 193 mov %eax, %fs 194 mov %eax, %gs 195 196 /* Get systime */ 197 xchg %rdx, %r11 198 rdtsc 199 shl $32, %rdx 200 mov %eax, %edx 201 202 /* Get new dispatcher pointer */ 203 mov OFFSETOF_DCB_DISP(%rsi), %rax 204 /* Disable target dispatcher -- gotta do it here for TLB hit reasons */ 205 movl $1, OFFSETOF_DISP_DISABLED(%rax) 206 /* update dispatcher's global delivered count */ 207 addl $(LRPC_MSG_LENGTH + LMP_RECV_HEADER_LENGTH), OFFSETOF_DISP_LMP_DELIVERED(%rax) 208 /* update systime field in dispatcher */ 209 movq %rdx, OFFSETOF_DISP_SYSTIME(%rax) 210 xchg %rdx, %r11 211 212 /* Check if it's necessary to load a new LDT */ 213 mov OFFSETOF_DISP_X86_64_LDT_BASE(%rax), %r11 214 mov OFFSETOF_DISP_X86_64_LDT_NPAGES(%rax), %r14 215 cmp current_ldt_base(%rip), %r11 216 jne load_ldt 217 218 cmp current_ldt_npages(%rip), %r14 219 jne load_ldt 220 221load_ldt_continue: 222 /* Enter at LRPC entry point */ 223 mov %r12d, %esi /* bufpos of reserved space in EP buffer */ 224 mov %r15, %rcx /* saved LRPC EP */ 225 movq OFFSETOF_DISP_UDISP(%rax), %rax /* user-level dispatcher pointer */ 226 mov $USER_RFLAGS, %r11 /* eflags */ 227 sysretq 228 229load_ldt: /* Load a new LDT: r11 = base, r14 = npages, rcx = temp for descriptor */ 230 231 /* If either base or npages is zero, load an invalid LDT */ 232 cmpq $0, %r11 233 je load_ldt_invalid 234 cmpq $0, %r14 235 je load_ldt_invalid 236 237 /* Update segment descriptor for LDT */ 238 239 movq %r11, current_ldt_base(%rip) 240 movq %r14, current_ldt_npages(%rip) 241 242 /* Format of high word of descriptor is: 243 * 32 bits of zero/reserved 244 * Base bits 63-32 */ 245 mov %r11, %rcx 246 shr $32, %rcx 247 shl $32, %rcx 248 249 // Store new descriptor (high half) to GDT 250 mov %rcx, (gdt + 8*LDT_HI_SEL)(%rip) 251 252 /* Format of low word of descriptor is: 253 * Base bits 31-24 (top 8 bits of 32-bit addr) 254 * 16 bits of flags/miscellany: 0x80e2 255 * granularity = 1 256 * operation_size = irrelevant 257 * long_mode = irrelevant 258 * available = irrelevant 259 * 4 high bits of limit address = 0 (assuming LDT is < 2**16 * 4k) 260 * present = 1 261 * privilege_level (2 bits wide) = 3 (user privilege) 262 * system descriptor = 0 263 * type (4 bits wide) = 2 264 * low 24 bits of base addr 265 * low 16 bits of limit 266 */ 267 268 // bits 24:31 of base 269 mov %r11, %rcx 270 shr $24, %rcx 271 272 // flags/misc 273 shl $16, %rcx 274 or $0x80e2, %rcx 275 276 // low 24 bits of base 277 shl $24, %rcx 278 shl $40, %r11 279 shr $40, %r11 280 or %r11, %rcx 281 282 // low 16 bits of limit 283 shl $16, %rcx 284 shl $48, %r14 285 shr $48, %r14 286 or %r14, %rcx 287 288 // Store new descriptor (low half) to GDT 289 mov %rcx, (gdt + 8*LDT_LO_SEL)(%rip) 290 291 // Construct segment selector and load it 292 mov $LDT_SELECTOR, %cx 293 lldt %cx 294 jmp load_ldt_continue 295 296load_ldt_invalid: /* Load an invalid LDT */ 297 mov $0, %cx 298 lldt %cx 299 movq $0, current_ldt_base(%rip) 300 movq $0, current_ldt_npages(%rip) 301 jmp load_ldt_continue 302 303err_l2index: 304err_slot: // Wrong slot 305 mov $SYS_ERR_LRPC_SLOT_INVALID, %rax 306 jmp err 307 308err_cspace: 309 mov $SYS_ERR_LRPC_NOT_L1, %rax 310 jmp err 311 312err_l2cnode: // Encountered non-CNode 313 int $3 // hw breakpoint 314 mov $SYS_ERR_LRPC_NOT_L2, %rax 315 jmp err 316 317err_endpoint: // Not an endpoint 318 mov $SYS_ERR_LRPC_NOT_ENDPOINT, %rax 319 /* jmp err - fall through */ 320 321 /* An error occured */ 322err: 323 /* Restore user's state */ 324 mov dcb_current(%rip), %rdi 325 mov OFFSETOF_DCB_DISP(%rdi), %rdi 326 lea OFFSETOF_DISP_X86_64_ENABLED_AREA(%rdi), %rdi 327 mov OFFSETOF_RIP_REG(%rdi), %rcx 328 mov OFFSETOF_EFLAGS_REG(%rdi), %r11 329 mov OFFSETOF_RSP_REG(%rdi), %rsp 330 sysretq 331 332err_disabled: // Target disabled 333 /* Return error to caller in their enabled save area */ 334 mov dcb_current(%rip), %rdi 335 mov OFFSETOF_DCB_DISP(%rdi), %rdi 336 lea OFFSETOF_DISP_X86_64_ENABLED_AREA(%rdi), %rdi 337 movq $SYS_ERR_LMP_TARGET_DISABLED, OFFSETOF_RAX_REG(%rdi) 338 339 /* Yield to target (call dispatch(target) in C) */ 340 mov %rsi, %rdi /* rdi = target DCB */ 341 lea (KERNEL_STACK + KERNEL_STACK_SIZE)(%rip), %rsp 342 jmp dispatch /* no return */ 343 344err_buflen: /* Target's endpoint buffer is full */ 345 /* Return error to caller in their enabled save area */ 346 mov dcb_current(%rip), %rdi 347 mov OFFSETOF_DCB_DISP(%rdi), %rdi 348 lea OFFSETOF_DISP_X86_64_ENABLED_AREA(%rdi), %rdi 349 movq $SYS_ERR_LMP_BUF_OVERFLOW, OFFSETOF_RAX_REG(%rdi) 350 351 /* Yield to target (call dispatch(target) in C) */ 352 mov %rsi, %rdi /* rdi = target DCB */ 353 lea (KERNEL_STACK + KERNEL_STACK_SIZE)(%rip), %rsp 354 jmp dispatch /* no return */ 355 356#ifdef CONFIG_SCHEDULER_RBED 357lrpc_rbed_check_runnable: 358 cmp queue_tail(%rip), %rsi 359 jne lrpc_make_runnable 360 jmp lrpc_check_runnable_continue 361#endif 362 363lrpc_make_runnable: 364 /* Save user stack */ 365 movq %rsp, user_stack_save(%rip) 366 367 /* Get kernel stack */ 368 lea (KERNEL_STACK + KERNEL_STACK_SIZE)(%rip), %rsp 369 370 // Save complete register state 371 pushq %rdx 372 pushq %rcx 373 pushq %rbx 374 pushq %rax 375 pushq %r15 376 pushq %r14 377 pushq %r13 378 pushq %r12 379 pushq %r11 380 pushq %r10 381 pushq %r9 382 pushq %r8 383 pushq %rbp 384 pushq %rdi 385 pushq %rsi 386 387 // Call make runnable in C 388 movq %rsi, %rdi 389 callq make_runnable 390 391 // Restore complete register state 392 popq %rsi 393 popq %rdi 394 popq %rbp 395 popq %r8 396 popq %r9 397 popq %r10 398 popq %r11 399 popq %r12 400 popq %r13 401 popq %r14 402 popq %r15 403 popq %rax 404 popq %rbx 405 popq %rcx 406 popq %rdx 407 408 /* Restore user stack */ 409 movq user_stack_save(%rip), %rsp 410 411 // Jump back 412 jmp lrpc_check_runnable_continue 413 414 415/* regular syscall path */ 416syscall_path: 417 /* Save user stack */ 418 movq %rsp, user_stack_save(%rip) 419 420 /* Get kernel stack */ 421 lea (KERNEL_STACK + KERNEL_STACK_SIZE)(%rip), %rsp 422 423 pushq %rcx /* Save user-space RIP */ 424 pushq %r11 /* Save user-space RFLAGS */ 425 426 pushq %rbx /* arg11 */ 427 pushq %rbp /* arg10 */ 428 pushq %rax /* arg9 */ 429 pushq %r15 /* arg8 */ 430 pushq %r14 /* arg7 */ 431 pushq %r13 /* arg6 */ 432 pushq %r12 /* arg5 */ 433 pushq %r9 /* arg4 */ 434 pushq %r8 /* arg3 */ 435 pushq %r10 /* arg2 in r10, NOT rcx from syscall */ 436 437 /* syscall number is in rdi (1st function argument) */ 438 /* arg0 is in rsi (2nd function argument) */ 439 /* arg1 is in rdx (3rd function argument) */ 440 movq %r11, %r8 /* 5th function argument is user's flags */ 441 movq %rcx, %r9 /* 6th function argument is user's IP */ 442 movq %rsp, %rcx /* 4th function argument is pointer to arg buffer */ 443 444 callq sys_syscall /* Process system call in C */ 445 446 addq $0x50, %rsp /* Remove buffer from stack */ 447 popq %r11 /* Restore RFLAGS */ 448 popq %rcx /* Restore RIP */ 449 movq user_stack_save(%rip), %rsp /* Restore user stack */ 450 sysretq /* Return to user-space */ 451 452 .bss 453 .comm user_stack_save, 8 454