1/**
2 * \file
3 * \brief System call entry point to the kernel and LRPC fast-path
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2009, 2010, 2016, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <barrelfish_kpi/syscalls.h>
16#include <barrelfish_kpi/capabilities.h>
17#include <barrelfish_kpi/lmp.h>
18#include <x86.h>
19#include <asmoffsets.h>
20
21#ifdef __k1om__
22#include <target/k1om/offsets_target.h>
23#define KERNEL_STACK_SIZE K1OM_KERNEL_STACK_SIZE
24#define KERNEL_STACK k1om_kernel_stack
25#define PHYS_TO_MEM_OFFSET 0xffffff0000000000
26#else
27#include <target/x86_64/offsets_target.h>
28#define KERNEL_STACK_SIZE X86_64_KERNEL_STACK_SIZE
29#define KERNEL_STACK x86_64_kernel_stack
30#define PHYS_TO_MEM_OFFSET 0xfffffe0000000000
31#endif
32
33    .text
34    .globl syscall_entry
35
36syscall_entry:
37    /* is this an LRPC or a normal syscall? */
38    cmp $SYSCALL_LRPC, %rdi
39    jne  syscall_path   /* normal syscall, branch off */
40
41    /* Load pointer to current DCB */
42    mov     dcb_current(%rip), %rdi
43
44    /* TODO: Check that caller is not disabled */
45
46    /* dcb_current->disabled=false */
47    movb $0, OFFSETOF_DCB_DISABLED(%rdi)
48
49    /* Save caller's registers */
50    mov     OFFSETOF_DCB_DISP(%rdi), %rdi
51    lea     OFFSETOF_DISP_X86_64_ENABLED_AREA(%rdi), %rdi
52    movq    $SYS_ERR_OK, OFFSETOF_RAX_REG(%rdi)
53    mov     %rcx, OFFSETOF_RIP_REG(%rdi)
54    mov     %r11, OFFSETOF_EFLAGS_REG(%rdi)
55    mov     %rsp, OFFSETOF_RSP_REG(%rdi)
56    mov     %fs, OFFSETOF_FS_REG(%rdi)
57    mov     %gs, OFFSETOF_GS_REG(%rdi)
58    fxsave  OFFSETOF_FXSAVE_AREA(%rdi)
59
60    /* Load pointer to root CNode cap into %rdi */
61    mov     dcb_current(%rip), %rdi
62    lea     OFFSETOF_DCB_CSPACE_CAP(%rdi), %rdi
63
64    cmpb    $OBJTYPE_L1CNODE, OFFSETOF_CAP_TYPE(%rdi)
65    jne     err_cspace
66
67    /* Deconstruct cap address in %rsi into L1/L2 indices */
68    /* Store L1 index in r11 */
69    mov     %rsi, %r11
70    shr     $L2_CNODE_BITS, %r11
71    /* Store L2 index in rsi */
72    mov     $1, %r15
73    shl     $L2_CNODE_BITS, %r15
74    sub     $1, %r15
75    and     %r15, %rsi
76
77    /* Check that slot number is within CNode */
78    movq    OFFSETOF_CAP_L1CNODE_ALLOCATED_BYTES(%rdi), %rcx
79    shr     $OBJBITS_CTE, %rcx
80    /* rcx: #slots in L1 CNode */
81    cmp     %rcx, %r11
82    jae     err_slot
83
84    /* Load pointer to endpoint cap: do two-level lookup */
85    /* deref slot in L1 cnode */
86    /*   scale index for array lookup */
87    shl     $OBJBITS_CTE, %r11
88    /* get cnode base address into rcx */
89    mov     OFFSETOF_CAP_L1CNODE_CNODE(%rdi), %rcx
90    /*   phys_to_mem() */
91    mov     $PHYS_TO_MEM_OFFSET, %rdi       // phys_to_mem()
92    add     %rdi, %rcx
93    /* add offset into L1 CNode */
94    add     %r11, %rcx
95
96    /* Check that we found a L2 CNode */
97    cmpb    $OBJTYPE_L2CNODE, OFFSETOF_CAP_TYPE(%rcx)
98    jne     err_l2cnode
99
100    /* L2 CNode cap pointer in %rcx */
101    /* Load pointer to EP from L2 CNode; L2 slot index in %rsi */
102    shl     $OBJBITS_CTE, %rsi
103    mov     OFFSETOF_CAP_L2CNODE_CNODE(%rcx), %rcx
104    /* phys_to_mem() */
105    mov     $PHYS_TO_MEM_OFFSET, %rdi
106    add     %rdi, %rcx
107    /* add offset into L2 CNode */
108    add     %rsi, %rcx
109
110    /* Check that it's an endpoint */
111    cmpb    $OBJTYPE_ENDPOINT, OFFSETOF_CAP_TYPE(%rcx)
112    jne     err_endpoint
113
114    /* TODO: Check rights on the endpoint */
115
116    /* Set epoffset for receiver, load epbuflen */
117    mov     OFFSETOF_CAP_ENDPOINT_EPOFFSET(%rcx), %rdi
118    mov     OFFSETOF_CAP_ENDPOINT_EPBUFLEN(%rcx), %r13d /* r13d = epbuflen */
119
120    /* Load pointer to listener's DCB */
121    mov     OFFSETOF_CAP_ENDPOINT_LISTENER(%rcx), %rsi
122
123    /* Check whether listener is runnable */
124#if defined(CONFIG_SCHEDULER_RR)
125    cmpl    $0, OFFSETOF_DCB_RR_PREV(%rsi)
126    je      lrpc_make_runnable
127#elif defined(CONFIG_SCHEDULER_RBED)
128    cmpl    $0, OFFSETOF_DCB_RBED_NEXT(%rsi)
129    je      lrpc_rbed_check_runnable
130#else
131# error Must define a kernel scheduling policy!
132#endif
133
134lrpc_check_runnable_continue:
135    /* Check whether listener is disabled */
136    cmpb    $0, OFFSETOF_DCB_DISABLED(%rsi)
137    jne     err_disabled
138
139    /* RCX = target dispatcher */
140    mov OFFSETOF_DCB_DISP(%rsi), %rcx
141
142    /* Remember LRPC entry point on target (R15) */
143    mov OFFSETOF_DISP_LRPC(%rcx), %r15
144
145    /* check that the receiver has space in their buffer */
146    add %rdi, %rcx /* add epoffset to dispatcher: rcx = endpoint pointer */
147    mov OFFSETOF_LMP_ENDPOINT_DELIVERED(%rcx), %r11d /* r11d = delivered */
148    mov %r11d, %r12d /* r12d = delivered */
149    mov OFFSETOF_LMP_ENDPOINT_CONSUMED(%rcx), %r14d /* r14d = consumed */
150
151    /*
152     *  newpos = delivered + len;
153     *  if (newpos >= consumed && consumed > delivered)
154     *    goto err_buflen;
155     *  if (newpos >= epbuflen) {
156     *    newpos -= epbuflen;
157     *    if (newpos >= consumed)
158     *      goto err_buflen;
159     *  }
160     *  delivered = newpos
161     */
162
163    add $(LRPC_MSG_LENGTH + LMP_RECV_HEADER_LENGTH), %r11d /* r11d (newpos) = delivered + len */
164
165    cmp %r14d, %r11d
166    jb 1f /* if newpos < consumed */
167    cmp %r12d, %r14d
168    ja err_buflen /* if consumed > delivered */
169
1701:
171    cmp %r13d, %r11d
172    jb 2f /* if newpos < epbuflen */
173
174    /* newpos >= epbuflen */
175    sub %r13d, %r11d /* newpos (r11d) -= epbuflen (r13d) */
176    cmp %r14d, %r11d /* if newpos >= consumed */
177    jae err_buflen
178
1792:      /* there's enough space, reserve it by updating delivered = newpos */
180    mov %r11d, OFFSETOF_LMP_ENDPOINT_DELIVERED(%rcx)
181
182lrpc_switch_domains:
183    /* Set current domain to receiver */
184    mov     %rsi, dcb_current(%rip)
185
186    /* Switch to listener address space */
187    mov     OFFSETOF_DCB_VSPACE(%rsi), %rax
188    mov     %rax, %cr3
189
190    /* Zero registers to avoid the receiver getting hold of them
191     * FIXME: should zero all non-payload registers */
192    xor     %eax, %eax
193    mov     %eax, %fs
194    mov     %eax, %gs
195
196    /* Get systime */
197    xchg    %rdx, %r11
198    rdtsc
199    shl     $32, %rdx
200    mov     %eax, %edx
201
202    /* Get new dispatcher pointer */
203    mov     OFFSETOF_DCB_DISP(%rsi), %rax
204    /* Disable target dispatcher -- gotta do it here for TLB hit reasons */
205    movl    $1, OFFSETOF_DISP_DISABLED(%rax)
206    /* update dispatcher's global delivered count */
207    addl    $(LRPC_MSG_LENGTH + LMP_RECV_HEADER_LENGTH), OFFSETOF_DISP_LMP_DELIVERED(%rax)
208    /* update systime field in dispatcher */
209    movq    %rdx, OFFSETOF_DISP_SYSTIME(%rax)
210    xchg    %rdx, %r11
211
212    /* Check if it's necessary to load a new LDT */
213    mov     OFFSETOF_DISP_X86_64_LDT_BASE(%rax), %r11
214    mov     OFFSETOF_DISP_X86_64_LDT_NPAGES(%rax), %r14
215    cmp     current_ldt_base(%rip), %r11
216    jne load_ldt
217
218    cmp     current_ldt_npages(%rip), %r14
219    jne load_ldt
220
221load_ldt_continue:
222    /* Enter at LRPC entry point */
223    mov     %r12d, %esi            /* bufpos of reserved space in EP buffer */
224    mov     %r15, %rcx             /* saved LRPC EP */
225    movq    OFFSETOF_DISP_UDISP(%rax), %rax /* user-level dispatcher pointer */
226    mov     $USER_RFLAGS, %r11  /* eflags */
227    sysretq
228
229load_ldt: /* Load a new LDT: r11 = base, r14 = npages, rcx = temp for descriptor */
230
231    /* If either base or npages is zero, load an invalid LDT */
232    cmpq    $0, %r11
233    je load_ldt_invalid
234    cmpq    $0, %r14
235    je load_ldt_invalid
236
237    /* Update segment descriptor for LDT */
238
239    movq    %r11, current_ldt_base(%rip)
240    movq    %r14, current_ldt_npages(%rip)
241
242    /* Format of high word of descriptor is:
243     * 32 bits of zero/reserved
244     * Base bits 63-32 */
245    mov %r11, %rcx
246    shr $32, %rcx
247    shl $32, %rcx
248
249    // Store new descriptor (high half) to GDT
250    mov %rcx, (gdt + 8*LDT_HI_SEL)(%rip)
251
252    /* Format of low word of descriptor is:
253     * Base bits 31-24 (top 8 bits of 32-bit addr)
254     * 16 bits of flags/miscellany: 0x80e2
255     *   granularity = 1
256     *   operation_size = irrelevant
257     *   long_mode = irrelevant
258     *   available = irrelevant
259     *   4 high bits of limit address = 0 (assuming LDT is < 2**16 * 4k)
260     *   present = 1
261     *   privilege_level (2 bits wide) = 3 (user privilege)
262     *   system descriptor = 0
263     *   type (4 bits wide) = 2
264     * low 24 bits of base addr
265     * low 16 bits of limit
266     */
267
268    // bits 24:31 of base
269    mov %r11, %rcx
270    shr $24, %rcx
271
272    // flags/misc
273    shl $16, %rcx
274    or  $0x80e2, %rcx
275
276    // low 24 bits of base
277    shl $24, %rcx
278    shl $40, %r11
279    shr $40, %r11
280    or  %r11, %rcx
281
282    // low 16 bits of limit
283    shl $16, %rcx
284    shl $48, %r14
285    shr $48, %r14
286    or  %r14, %rcx
287
288    // Store new descriptor (low half) to GDT
289    mov %rcx, (gdt + 8*LDT_LO_SEL)(%rip)
290
291    // Construct segment selector and load it
292    mov     $LDT_SELECTOR, %cx
293    lldt    %cx
294    jmp     load_ldt_continue
295
296load_ldt_invalid:  /* Load an invalid LDT */
297    mov     $0, %cx
298    lldt    %cx
299    movq    $0, current_ldt_base(%rip)
300    movq    $0, current_ldt_npages(%rip)
301    jmp     load_ldt_continue
302
303err_l2index:
304err_slot:       // Wrong slot
305    mov     $SYS_ERR_LRPC_SLOT_INVALID, %rax
306    jmp     err
307
308err_cspace:
309    mov     $SYS_ERR_LRPC_NOT_L1, %rax
310    jmp     err
311
312err_l2cnode:    // Encountered non-CNode
313    int     $3 // hw breakpoint
314    mov     $SYS_ERR_LRPC_NOT_L2, %rax
315    jmp     err
316
317err_endpoint:   // Not an endpoint
318    mov     $SYS_ERR_LRPC_NOT_ENDPOINT, %rax
319    /* jmp  err  - fall through */
320
321    /* An error occured */
322err:
323    /* Restore user's state */
324    mov dcb_current(%rip), %rdi
325    mov OFFSETOF_DCB_DISP(%rdi), %rdi
326    lea OFFSETOF_DISP_X86_64_ENABLED_AREA(%rdi), %rdi
327    mov OFFSETOF_RIP_REG(%rdi), %rcx
328    mov OFFSETOF_EFLAGS_REG(%rdi), %r11
329    mov OFFSETOF_RSP_REG(%rdi), %rsp
330    sysretq
331
332err_disabled:   // Target disabled
333    /* Return error to caller in their enabled save area */
334    mov dcb_current(%rip), %rdi
335    mov OFFSETOF_DCB_DISP(%rdi), %rdi
336    lea OFFSETOF_DISP_X86_64_ENABLED_AREA(%rdi), %rdi
337    movq $SYS_ERR_LMP_TARGET_DISABLED, OFFSETOF_RAX_REG(%rdi)
338
339    /* Yield to target (call dispatch(target) in C) */
340    mov %rsi, %rdi /* rdi = target DCB */
341    lea (KERNEL_STACK + KERNEL_STACK_SIZE)(%rip), %rsp
342    jmp dispatch /* no return */
343
344err_buflen:     /* Target's endpoint buffer is full */
345    /* Return error to caller in their enabled save area */
346    mov dcb_current(%rip), %rdi
347    mov OFFSETOF_DCB_DISP(%rdi), %rdi
348    lea OFFSETOF_DISP_X86_64_ENABLED_AREA(%rdi), %rdi
349    movq $SYS_ERR_LMP_BUF_OVERFLOW, OFFSETOF_RAX_REG(%rdi)
350
351    /* Yield to target (call dispatch(target) in C) */
352    mov %rsi, %rdi /* rdi = target DCB */
353    lea (KERNEL_STACK + KERNEL_STACK_SIZE)(%rip), %rsp
354    jmp dispatch /* no return */
355
356#ifdef CONFIG_SCHEDULER_RBED
357lrpc_rbed_check_runnable:
358    cmp     queue_tail(%rip), %rsi
359    jne     lrpc_make_runnable
360    jmp     lrpc_check_runnable_continue
361#endif
362
363lrpc_make_runnable:
364    /* Save user stack */
365    movq    %rsp, user_stack_save(%rip)
366
367    /* Get kernel stack */
368    lea     (KERNEL_STACK + KERNEL_STACK_SIZE)(%rip), %rsp
369
370    // Save complete register state
371    pushq   %rdx
372    pushq   %rcx
373    pushq   %rbx
374    pushq   %rax
375    pushq   %r15
376    pushq   %r14
377    pushq   %r13
378    pushq   %r12
379    pushq   %r11
380    pushq   %r10
381    pushq   %r9
382    pushq   %r8
383    pushq   %rbp
384    pushq   %rdi
385    pushq   %rsi
386
387    // Call make runnable in C
388    movq    %rsi, %rdi
389    callq   make_runnable
390
391    // Restore complete register state
392    popq    %rsi
393    popq    %rdi
394    popq    %rbp
395    popq    %r8
396    popq    %r9
397    popq    %r10
398    popq    %r11
399    popq    %r12
400    popq    %r13
401    popq    %r14
402    popq    %r15
403    popq    %rax
404    popq    %rbx
405    popq    %rcx
406    popq    %rdx
407
408    /* Restore user stack */
409    movq    user_stack_save(%rip), %rsp
410
411    // Jump back
412    jmp     lrpc_check_runnable_continue
413
414
415/* regular syscall path */
416syscall_path:
417    /* Save user stack */
418    movq    %rsp, user_stack_save(%rip)
419
420    /* Get kernel stack */
421    lea (KERNEL_STACK + KERNEL_STACK_SIZE)(%rip), %rsp
422
423    pushq   %rcx            /* Save user-space RIP */
424    pushq   %r11            /* Save user-space RFLAGS */
425
426    pushq   %rbx            /* arg11 */
427    pushq   %rbp            /* arg10 */
428    pushq   %rax            /* arg9 */
429    pushq   %r15            /* arg8 */
430    pushq   %r14            /* arg7 */
431    pushq   %r13            /* arg6 */
432    pushq   %r12            /* arg5 */
433    pushq   %r9             /* arg4 */
434    pushq   %r8             /* arg3 */
435    pushq   %r10            /* arg2 in r10, NOT rcx from syscall */
436
437    /* syscall number is in rdi (1st function argument) */
438    /* arg0 is in rsi (2nd function argument) */
439    /* arg1 is in rdx (3rd function argument) */
440    movq    %r11, %r8   /* 5th function argument is user's flags */
441    movq    %rcx, %r9   /* 6th function argument is user's IP */
442    movq    %rsp, %rcx  /* 4th function argument is pointer to arg buffer */
443
444    callq   sys_syscall     /* Process system call in C */
445
446    addq    $0x50, %rsp     /* Remove buffer from stack */
447    popq    %r11            /* Restore RFLAGS */
448    popq    %rcx            /* Restore RIP */
449    movq    user_stack_save(%rip), %rsp /* Restore user stack */
450    sysretq             /* Return to user-space */
451
452    .bss
453    .comm   user_stack_save, 8
454