1/**
2 * \file
3 * \brief System calls implementation.
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2009, 2010, 2012, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <kernel.h>
16#include <kcb.h>
17#include <sys_debug.h>
18#include <syscall.h>
19#include <barrelfish_kpi/syscalls.h>
20#include <mdb/mdb.h>
21#include <mdb/mdb_tree.h>
22#include <dispatch.h>
23#include <paging_kernel_arch.h>
24#include <paging_generic.h>
25#include <exec.h>
26#include <systime.h>
27#include <arch/x86/x86.h>
28#include <arch/x86/apic.h>
29#include <arch/x86/global.h>
30#include <arch/x86/perfmon.h>
31#include <arch/x86/debugregs.h>
32#include <arch/x86/syscall.h>
33#include <arch/x86/timing.h>
34#include <arch/x86/ipi_notify.h>
35#include <barrelfish_kpi/sys_debug.h>
36#include <barrelfish_kpi/lmp.h>
37#include <barrelfish_kpi/dispatcher_shared_target.h>
38#include <barrelfish_kpi/platform.h>
39#include <trace/trace.h>
40#include <useraccess.h>
41#ifndef __k1om__
42#include <vmkit.h>
43#include <dev/amd_vmcb_dev.h>
44#endif
45
46extern uint64_t user_stack_save;
47
48/**
49 * For benchmarking only
50 */
51static struct sysret handle_noop(struct capability *to, int cmd, uintptr_t *args)
52{
53    return SYSRET(SYS_ERR_OK);
54}
55
56/* FIXME: lots of missing argument checks in this function */
57static struct sysret handle_dispatcher_setup(struct capability *to,
58                                             int cmd, uintptr_t *args)
59{
60    capaddr_t cptr      = args[0];
61    uint8_t   level     = args[1];
62    capaddr_t vptr      = args[2] & 0xffffffff;
63    capaddr_t dptr      = args[3] & 0xffffffff;
64    bool      run       = args[4];
65    capaddr_t odptr     = args[5] & 0xffffffff;
66
67    TRACE(KERNEL, SC_DISP_SETUP, 0);
68    struct sysret sr = sys_dispatcher_setup(to, cptr, level, vptr, dptr, run, odptr);
69    TRACE(KERNEL, SC_DISP_SETUP, 1);
70    return sr;
71}
72
73static struct sysret handle_dispatcher_properties(struct capability *to,
74                                                  int cmd, uintptr_t *args)
75{
76    enum task_type type = args[0];
77    unsigned long deadline = args[1];
78    unsigned long wcet = args[2];
79    unsigned long period = args[3];
80    unsigned long release = args[4];
81    unsigned short weight = args[5];
82
83    TRACE(KERNEL, SC_DISP_PROPS, 0);
84    struct sysret sr = sys_dispatcher_properties(to, type, deadline, wcet, period,
85                                                 release, weight);
86    TRACE(KERNEL, SC_DISP_PROPS, 1);
87    return sr;
88}
89
90static struct sysret handle_retype_common(struct capability *root,
91                                          uintptr_t *args,
92                                          bool from_monitor)
93{
94    capaddr_t source_croot    = args[0] >> 32;
95    capaddr_t source_cptr     = args[0] & 0xffffffff;
96    uint64_t offset           = args[1];
97    uint64_t type             = args[2];
98    uint64_t objsize          = args[3];
99    uint64_t objcount         = args[4];
100    capaddr_t dest_cspace_cptr= args[5] >> 32;
101    capaddr_t dest_cnode_cptr = args[5] & 0xffffffff;
102    uint64_t dest_slot        = args[6];
103    uint64_t dest_cnode_level = args[7];
104
105    TRACE(KERNEL, SC_RETYPE, 0);
106    struct sysret sr = sys_retype(root, source_croot, source_cptr, offset, type,
107                                  objsize, objcount, dest_cspace_cptr,
108                                  dest_cnode_cptr, dest_cnode_level,
109                                  dest_slot, from_monitor);
110    TRACE(KERNEL, SC_RETYPE, 1);
111    return sr;
112}
113
114static struct sysret handle_retype(struct capability *root,
115                                   int cmd, uintptr_t *args)
116{
117        return handle_retype_common(root, args, false);
118}
119
120static struct sysret handle_create(struct capability *root,
121                                   int cmd, uintptr_t *args)
122{
123    /* Retrieve arguments */
124    enum objtype type         = args[0];
125    size_t objsize            = args[1];
126    capaddr_t dest_cnode_cptr = args[2];
127    uint8_t dest_level        = args[3];
128    cslot_t dest_slot         = args[4];
129
130    TRACE(KERNEL, SC_CREATE, 0);
131    struct sysret sr = sys_create(root, type, objsize, dest_cnode_cptr,
132                                  dest_level, dest_slot);
133    TRACE(KERNEL, SC_CREATE, 1);
134    return sr;
135}
136
137
138static struct sysret handle_map(struct capability *ptable,
139                                int cmd, uintptr_t *args)
140{
141    /* Retrieve arguments */
142    uint64_t  slot            = args[0];
143    capaddr_t source_root_cptr= args[1] >> 32;
144    capaddr_t source_cptr     = args[1] & 0xffffffff;
145    uint8_t   source_level    = args[2];
146    uint64_t  flags           = args[3];
147    uint64_t  offset          = args[4];
148    uint64_t  pte_count       = args[5];
149    capaddr_t mapping_croot   = args[6] >> 32;
150    capaddr_t mapping_cnptr   = args[6] & 0xffffffff;
151    uint8_t   mapping_cn_level= args[7];
152    cslot_t   mapping_slot    = args[8];
153
154    TRACE(KERNEL, SC_MAP, 0);
155    struct sysret sr = sys_map(ptable, slot, source_root_cptr, source_cptr,
156                               source_level, flags, offset, pte_count,
157                               mapping_croot, mapping_cnptr,
158                               mapping_cn_level, mapping_slot);
159    TRACE(KERNEL, SC_MAP, 1);
160    return sr;
161}
162
163/**
164 * Common code for copying and minting except the mint flag and param passing
165 */
166static struct sysret copy_or_mint(struct capability *root,
167                                  uintptr_t *args, bool mint)
168{
169    /* Retrieve arguments */
170    capaddr_t dest_cspace_cptr = args[0];
171    capaddr_t destcn_cptr      = args[1];
172    uint64_t  dest_slot        = args[2];
173    capaddr_t source_croot_ptr = args[3];
174    capaddr_t source_cptr      = args[4];
175    uint8_t destcn_level       = args[5];
176    uint8_t source_level       = args[6];
177    uint64_t param1, param2;
178    // params only sent if mint operation
179    if (mint) {
180        param1 = args[7];
181        param2 = args[8];
182    } else {
183        param1 = param2 = 0;
184    }
185
186    TRACE(KERNEL, SC_COPY_OR_MINT, 0);
187    struct sysret sr = sys_copy_or_mint(root, dest_cspace_cptr, destcn_cptr, dest_slot,
188                                        source_croot_ptr, source_cptr,
189                                        destcn_level, source_level,
190                                        param1, param2, mint);
191    TRACE(KERNEL, SC_COPY_OR_MINT, 1);
192    return sr;
193}
194
195static struct sysret handle_mint(struct capability *root,
196                                 int cmd, uintptr_t *args)
197{
198    return copy_or_mint(root, args, true);
199}
200
201static struct sysret handle_copy(struct capability *root,
202                                 int cmd, uintptr_t *args)
203{
204    return copy_or_mint(root, args, false);
205}
206
207static struct sysret handle_delete(struct capability *root,
208                                   int cmd, uintptr_t *args)
209{
210    capaddr_t cptr = args[0];
211    uint8_t level  = args[1];
212    return sys_delete(root, cptr, level);
213}
214
215static struct sysret handle_revoke(struct capability *root,
216                                   int cmd, uintptr_t *args)
217{
218    capaddr_t cptr = args[0];
219    uint8_t level  = args[1];
220    return sys_revoke(root, cptr, level);
221}
222
223static struct sysret handle_get_state(struct capability *root,
224                                      int cmd, uintptr_t *args)
225{
226    capaddr_t cptr = args[0];
227    uint8_t level  = args[1];
228    return sys_get_state(root, cptr, level);
229}
230static struct sysret handle_vnode_modify_flags(struct capability *to,
231                                               int cmd, uintptr_t *args)
232{
233    size_t entry   = args[0];
234    size_t pages   = args[1];
235    size_t flags   = args[2];
236
237    return SYSRET(ptable_modify_flags(to, entry, pages, flags));
238}
239
240static struct sysret handle_get_size(struct capability *root,
241                                      int cmd, uintptr_t *args)
242{
243    return sys_get_size_l1cnode(root);
244}
245
246static struct sysret handle_resize(struct capability *root,
247                                   int cmd, uintptr_t *args)
248{
249    capaddr_t newroot_ptr = args[0];
250    capaddr_t retcn_ptr   = args[1];
251    cslot_t   retslot     = args[2];
252    return sys_resize_l1cnode(root, newroot_ptr, retcn_ptr, retslot);
253}
254
255static struct sysret handle_cap_identify(struct capability *root,
256                                         int cmd, uintptr_t *args)
257{
258    capaddr_t cptr = args[0];
259    uint8_t  level = args[1];
260    struct capability *cap = (struct capability*)args[2];
261
262    return sys_identify_cap(root, cptr, level, cap);
263}
264
265static struct sysret handle_unmap(struct capability *pgtable,
266                                  int cmd, uintptr_t *args)
267{
268    capaddr_t cptr = args[0];
269    uint8_t level  = args[1];
270
271    errval_t err;
272    struct cte *mapping;
273    err = caps_lookup_slot(&dcb_current->cspace.cap, cptr, level,
274                           &mapping, CAPRIGHTS_READ_WRITE);
275    if (err_is_fail(err)) {
276        return SYSRET(err_push(err, SYS_ERR_CAP_NOT_FOUND));
277    }
278
279    TRACE(KERNEL, SC_UNMAP, 0);
280    err = page_mappings_unmap(pgtable, mapping);
281    TRACE(KERNEL, SC_UNMAP, 1);
282    return SYSRET(err);
283}
284
285static struct sysret handle_mapping_destroy(struct capability *mapping,
286                                            int cmd, uintptr_t *args)
287{
288    panic("NYI!");
289    return SYSRET(SYS_ERR_OK);
290}
291
292static struct sysret handle_mapping_modify(struct capability *mapping,
293                                           int cmd, uintptr_t *args)
294{
295    // Modify flags of (part of) mapped region of frame
296    assert(type_is_mapping(mapping->type));
297
298    // unpack arguments
299    size_t offset = args[0]; // in pages; of first page to modify from first
300                             // page in mapped region
301    size_t pages  = args[1]; // #pages to modify
302    size_t flags  = args[2]; // new flags
303    genvaddr_t va = args[3]; // virtual addr hint
304
305    errval_t err = page_mappings_modify_flags(mapping, offset, pages, flags, va);
306
307    return (struct sysret) {
308        .error = err,
309        .value = 0,
310    };
311}
312
313static struct sysret handle_vnode_copy_remap(struct capability *ptable,
314                                             int cmd, uintptr_t *args)
315{
316    /* Retrieve arguments */
317    uint64_t  slot          = args[0];
318    capaddr_t source_cptr   = args[1];
319    int       source_level  = args[2];
320    uint64_t  flags         = args[3];
321    uint64_t  offset        = args[4];
322    uint64_t  pte_count     = args[5];
323    capaddr_t mapping_cnptr = args[6];
324    cslot_t   mapping_slot  = args[7];
325    uint8_t   mapping_cnlevel=args[8];
326
327    struct sysret sr = sys_copy_remap(ptable, slot, source_cptr, source_level, flags,
328                                      offset, pte_count, mapping_cnptr,
329                                      mapping_cnlevel, mapping_slot);
330    return sr;
331}
332
333/*
334 *  MVAS Extension
335 */
336static struct sysret handle_inherit(struct capability *dest,
337                                  int cmd, uintptr_t *args)
338{
339    errval_t err;
340
341    capaddr_t source_cptr   = args[0];
342    int       source_level  = args[1];
343    uint64_t  start         = args[2];
344    uint64_t  end           = args[3];
345    uint64_t  flags         = args[4];
346    capaddr_t src_mcn0_cptr = (args[5] >> CPTR_BITS) & MASK(CPTR_BITS);
347    capaddr_t src_mcn1_cptr = args[5] & MASK(CPTR_BITS);
348    capaddr_t dst_mcn0_cptr = (args[6] >> CPTR_BITS) & MASK(CPTR_BITS);
349    capaddr_t dst_mcn1_cptr = args[6] & MASK(CPTR_BITS);
350
351    if (start > PTABLE_SIZE || end > PTABLE_SIZE) {
352        return SYSRET(SYS_ERR_SLOTS_INVALID);
353    }
354
355    if (start > end) {
356        return SYSRET(SYS_ERR_OK);
357    }
358
359    struct capability *root = &dcb_current->cspace.cap;
360    struct cte *src_cte;
361    err = caps_lookup_slot(root, source_cptr, source_level, &src_cte,
362                           CAPRIGHTS_READ);
363    if (err_is_fail(err)) {
364        return SYSRET(err_push(err, SYS_ERR_SOURCE_CAP_LOOKUP));
365    }
366    struct capability *src  = &src_cte->cap;
367
368    if (dest->type != src->type) {
369        return SYSRET(SYS_ERR_CNODE_TYPE);
370    }
371
372    /* lookup src mapping cnodes */
373    struct cte *src_mcn0;
374    err = caps_lookup_slot(root, src_mcn0_cptr, CNODE_TYPE_OTHER, &src_mcn0,
375                           CAPRIGHTS_READ_WRITE);
376    if (err_is_fail(err)) {
377        printk(LOG_NOTE, "%s: looking up cte for src mcn0 (%#"PRIxCADDR"): %d\n",
378                __FUNCTION__, src_mcn0_cptr, err_no(err));
379        return SYSRET(err_push(err, SYS_ERR_DEST_CNODE_LOOKUP));
380    }
381    if (src_mcn0->cap.type != ObjType_L2CNode) {
382        return SYSRET(SYS_ERR_DEST_TYPE_INVALID);
383    }
384    struct cte *src_mcn1;
385    err = caps_lookup_slot(root, src_mcn1_cptr, CNODE_TYPE_OTHER, &src_mcn1,
386                           CAPRIGHTS_READ_WRITE);
387    if (err_is_fail(err)) {
388        printk(LOG_NOTE, "%s: looking up cte for src mcn1 (%#"PRIxCADDR"): %d\n",
389                __FUNCTION__, src_mcn1_cptr, err_no(err));
390        return SYSRET(err_push(err, SYS_ERR_DEST_CNODE_LOOKUP));
391    }
392    if (src_mcn1->cap.type != ObjType_L2CNode) {
393        return SYSRET(SYS_ERR_DEST_TYPE_INVALID);
394    }
395    /* lookup src mapping cnodes */
396    struct cte *dst_mcn0;
397    err = caps_lookup_slot(root, dst_mcn0_cptr, CNODE_TYPE_OTHER, &dst_mcn0,
398                           CAPRIGHTS_READ_WRITE);
399    if (err_is_fail(err)) {
400        printk(LOG_NOTE, "%s: looking up cte for dest mcn0 (%#"PRIxCADDR"): %d\n",
401                __FUNCTION__, dst_mcn0_cptr, err_no(err));
402        return SYSRET(err_push(err, SYS_ERR_DEST_CNODE_LOOKUP));
403    }
404    if (dst_mcn0->cap.type != ObjType_L2CNode) {
405        return SYSRET(SYS_ERR_DEST_TYPE_INVALID);
406    }
407    struct cte *dst_mcn1;
408    err = caps_lookup_slot(root, dst_mcn1_cptr, CNODE_TYPE_OTHER, &dst_mcn1,
409                           CAPRIGHTS_READ_WRITE);
410    if (err_is_fail(err)) {
411        printk(LOG_NOTE, "%s: looking up cte for dest mcn1 (%#"PRIxCADDR"): %d\n",
412                __FUNCTION__, dst_mcn1_cptr, err_no(err));
413        return SYSRET(err_push(err, SYS_ERR_DEST_CNODE_LOOKUP));
414    }
415    if (dst_mcn1->cap.type != ObjType_L2CNode) {
416        return SYSRET(SYS_ERR_DEST_TYPE_INVALID);
417    }
418
419    genpaddr_t dst_addr = get_address(dest);
420    genpaddr_t src_addr = get_address(src);
421    if (!type_is_vnode(dest->type)) {
422        return SYSRET(SYS_ERR_VNODE_TYPE);
423    }
424
425    uint64_t *dst_entry = (uint64_t *)local_phys_to_mem(dst_addr);
426    uint64_t *src_entry = (uint64_t *)local_phys_to_mem(src_addr);
427
428    debug(SUBSYS_PAGING, "vnode_inherit: copying entries %"PRIu64"--%"PRIu64
429            " from %p to %p, new flags = %"PRIx64"\n",
430            start, end, src_entry, dst_entry, flags);
431
432    struct cte *src_mapping = NULL, *dst_mapping = NULL;
433    struct capability *src_mcn[PTABLE_SIZE / L2_CNODE_SLOTS] = { &src_mcn0->cap, &src_mcn1->cap };
434    struct capability *dst_mcn[PTABLE_SIZE / L2_CNODE_SLOTS] = { &dst_mcn0->cap, &dst_mcn1->cap };
435    for (uint64_t i = start; i < end; ++i) {
436        int mcn_idx = i / L2_CNODE_SLOTS;
437        assert(mcn_idx == 0 || mcn_idx == 1);
438        cslot_t mslot = i % L2_CNODE_SLOTS;
439        src_mapping = caps_locate_slot(get_address(src_mcn[mcn_idx]), mslot);
440        dst_mapping = caps_locate_slot(get_address(dst_mcn[mcn_idx]), mslot);
441        //printf("kernel: cpy: %p -> %p\n", src_entry+i, dst_entry+i);
442        //printf("kernel: cpy: [%016lx] -> [%016lx]\n", src_entry[i], dst_entry[i]);
443        assert(dst_entry[i] == 0);
444        // clone entry
445        dst_entry[i] = src_entry[i];
446        // create mapping cap for cloned entry if src mapping cap not null
447        if (src_mapping->cap.type != ObjType_Null) {
448            struct Frame_Mapping *sm = &src_mapping->cap.u.frame_mapping;
449            create_mapping_cap(dst_mapping, sm->cap, cte_for_cap(dest), i, sm->pte_count);
450            err = mdb_insert(dst_mapping);
451            assert(err_is_ok(err));
452        }
453    }
454
455    if (flags) {
456        return SYSRET(ptable_modify_flags(dest, start, end - start, flags));
457    }
458    return SYSRET(SYS_ERR_OK);
459}
460
461/// Different handler for cap operations performed by the monitor
462static struct sysret monitor_handle_retype(struct capability *kernel_cap,
463                                           int cmd, uintptr_t *args)
464{
465    return handle_retype_common(&dcb_current->cspace.cap, args, true);
466}
467
468static struct sysret monitor_handle_has_descendants(struct capability *kernel_cap,
469                                                    int cmd, uintptr_t *args)
470{
471    struct capability *src = (struct capability *)args;
472
473    struct cte *next = mdb_find_greater(src, false);
474
475    return (struct sysret) {
476        .error = SYS_ERR_OK,
477        .value = (next && is_ancestor(&next->cap, src)),
478    };
479}
480
481static struct sysret monitor_handle_is_retypeable(struct capability *kernel_cap,
482                                                  int cmd, uintptr_t *args)
483{
484    struct capability *src = (struct capability*)args;
485    int pos = ROUND_UP(sizeof(struct capability), sizeof(uint64_t)) / sizeof(uint64_t);
486
487    uintptr_t offset  = args[pos];
488    uintptr_t objsize = args[pos + 1];
489    uintptr_t count   = args[pos + 2];
490
491    return sys_monitor_is_retypeable(src, offset, objsize, count);
492}
493
494static struct sysret monitor_handle_delete_last(struct capability *kernel_cap,
495                                                int cmd, uintptr_t *args)
496{
497    capaddr_t root_caddr   = args[0];
498    uint8_t root_level     = args[1];
499    capaddr_t target_caddr = args[2];
500    uint8_t target_level   = args[3];
501    capaddr_t retcn_caddr  = args[4];
502    uint8_t retcn_level    = args[5];
503    cslot_t ret_slot       = args[6];
504
505    return sys_monitor_delete_last(root_caddr, root_level, target_caddr,
506                                   target_level, retcn_caddr, retcn_level,
507                                   ret_slot);
508}
509
510static struct sysret monitor_handle_delete_foreigns(struct capability *kernel_cap,
511                                                    int cmd, uintptr_t *args)
512{
513    capaddr_t caddr = args[0];
514    uint8_t level   = args[1];
515    return sys_monitor_delete_foreigns(caddr, level);
516}
517
518static struct sysret monitor_handle_revoke_mark_tgt(struct capability *kernel_cap,
519                                                    int cmd, uintptr_t *args)
520{
521    capaddr_t root_caddr   = args[0];
522    uint8_t root_level     = args[1];
523    capaddr_t target_caddr = args[2];
524    uint8_t target_level   = args[3];
525
526    return sys_monitor_revoke_mark_tgt(root_caddr, root_level,
527                                       target_caddr, target_level);
528}
529
530static struct sysret monitor_handle_revoke_mark_rels(struct capability *kernel_cap,
531                                                     int cmd, uintptr_t *args)
532{
533    struct capability *base = (struct capability*)args;
534
535    return sys_monitor_revoke_mark_rels(base);
536}
537
538static struct sysret monitor_handle_delete_step(struct capability *kernel_cap,
539                                                int cmd, uintptr_t *args)
540{
541    capaddr_t ret_cn_addr  = args[0];
542    capaddr_t ret_cn_level = args[1];
543    capaddr_t ret_slot     = args[2];
544
545    return sys_monitor_delete_step(ret_cn_addr, ret_cn_level, ret_slot);
546}
547
548static struct sysret monitor_handle_clear_step(struct capability *kernel_cap,
549                                               int cmd, uintptr_t *args)
550{
551    capaddr_t ret_cn_addr  = args[0];
552    capaddr_t ret_cn_level = args[1];
553    capaddr_t ret_slot     = args[2];
554
555    return sys_monitor_clear_step(ret_cn_addr, ret_cn_level, ret_slot);
556}
557
558static struct sysret monitor_handle_register(struct capability *kernel_cap,
559                                             int cmd, uintptr_t *args)
560{
561    capaddr_t ep_caddr = args[0];
562
563    TRACE(KERNEL, SC_MONITOR_REGISTER, 0);
564    struct sysret sr = sys_monitor_register(ep_caddr);
565    TRACE(KERNEL, SC_MONITOR_REGISTER, 1);
566    return sr;
567}
568
569static struct sysret monitor_get_core_id(struct capability *kernel_cap,
570                                         int cmd, uintptr_t *args)
571{
572    return (struct sysret){.error = SYS_ERR_OK, .value = my_core_id};
573}
574
575static struct sysret monitor_get_arch_id(struct capability *kernel_cap,
576                                         int cmd, uintptr_t *args)
577{
578    return (struct sysret){.error = SYS_ERR_OK, .value = apic_id};
579}
580
581static struct sysret monitor_identify_cap_common(struct capability *kernel_cap,
582                                                 struct capability *root,
583                                                 uintptr_t *args)
584{
585    capaddr_t cptr = args[0];
586    uint8_t level  = args[1];
587
588    struct capability *retbuf = (void *)args[2];
589
590    return sys_monitor_identify_cap(root, cptr, level, retbuf);
591}
592
593static struct sysret monitor_identify_cap(struct capability *kernel_cap,
594                                          int cmd, uintptr_t *args)
595{
596    return monitor_identify_cap_common(kernel_cap, &dcb_current->cspace.cap, args);
597}
598
599static struct sysret monitor_identify_domains_cap(struct capability *kernel_cap,
600                                                  int cmd, uintptr_t *args)
601{
602    errval_t err;
603
604    capaddr_t root_caddr = args[0];
605    capaddr_t root_level = args[1];
606
607    struct capability *root;
608    err = caps_lookup_cap(&dcb_current->cspace.cap, root_caddr, root_level,
609                          &root, CAPRIGHTS_READ);
610
611    if (err_is_fail(err)) {
612        return SYSRET(err_push(err, SYS_ERR_ROOT_CAP_LOOKUP));
613    }
614
615    /* XXX: this hides the first two arguments */
616    return monitor_identify_cap_common(kernel_cap, root, &args[2]);
617}
618
619static struct sysret monitor_cap_has_relations(struct capability *kernel_cap,
620                                               int cmd, uintptr_t *args)
621{
622    capaddr_t caddr = args[0];
623    uint8_t level   = args[1];
624    uint8_t mask    = args[2];
625
626    return sys_cap_has_relations(caddr, level, mask);
627}
628
629static struct sysret monitor_remote_relations(struct capability *kernel_cap,
630                                              int cmd, uintptr_t *args)
631{
632    capaddr_t root_addr = args[0];
633    int root_level      = args[1];
634    capaddr_t cptr      = args[2];
635    int level           = args[3];
636    uint8_t relations   = args[4]        & 0xFF;
637    uint8_t mask        = (args[4] >> 8) & 0xFF;
638
639    return sys_monitor_remote_relations(root_addr, root_level, cptr, level,
640                                        relations, mask);
641}
642
643
644static struct sysret monitor_create_cap(struct capability *kernel_cap,
645                                        int cmd, uintptr_t *args)
646{
647    /* XXX: Get the raw metadata of the capability to create */
648    struct capability *src = (struct capability *)args;
649    int pos = ROUND_UP(sizeof(struct capability), sizeof(uint64_t)) / sizeof(uint64_t);
650
651    /* Cannot create null caps */
652    if (src->type == ObjType_Null) {
653        return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
654    }
655
656    coreid_t owner = args[pos + 3];
657
658    /* For certain types, only foreign copies can be created here */
659    if ((src->type == ObjType_EndPointLMP || src->type == ObjType_Dispatcher
660         || src->type == ObjType_Kernel || src->type == ObjType_IRQTable)
661        && owner == my_core_id)
662    {
663        return SYSRET(SYS_ERR_ILLEGAL_DEST_TYPE);
664    }
665
666    /* Create the cap in the destination */
667    capaddr_t cnode_cptr = args[pos];
668    int cnode_level      = args[pos + 1];
669    size_t slot          = args[pos + 2];
670    assert(cnode_level <= 2);
671
672    return SYSRET(caps_create_from_existing(&dcb_current->cspace.cap,
673                                            cnode_cptr, cnode_level,
674                                            slot, owner, src));
675}
676
677static struct sysret monitor_copy_existing(struct capability *kernel_cap,
678                                        int cmd, uintptr_t *args)
679{
680    /* XXX: Get the raw metadata of the capability to create */
681    struct capability *src = (struct capability *)args;
682    int pos = ROUND_UP(sizeof(struct capability), sizeof(uint64_t)) / sizeof(uint64_t);
683
684    capaddr_t croot_cptr = args[pos];
685    capaddr_t cnode_cptr = args[pos + 1];
686    int cnode_level      = args[pos + 2];
687    size_t slot          = args[pos + 3];
688
689    return sys_monitor_copy_existing(src, croot_cptr, cnode_cptr, cnode_level, slot);
690}
691
692static struct sysret monitor_nullify_cap(struct capability *kernel_cap,
693                                         int cmd, uintptr_t *args)
694{
695    capaddr_t cptr = args[0];
696    uint8_t level  = args[1];
697
698    return sys_monitor_nullify_cap(cptr, level);
699}
700
701static struct sysret monitor_handle_sync_timer(struct capability *kern_cap,
702                                               int cmd, uintptr_t *args)
703{
704    uint64_t synctime = args[0];
705    return sys_monitor_handle_sync_timer(synctime);
706}
707
708static struct sysret monitor_get_platform(struct capability *kern_cap,
709                                          int cmd, uintptr_t *args)
710{
711    if (!access_ok(ACCESS_WRITE, args[0], sizeof(struct platform_info))) {
712        return SYSRET(SYS_ERR_INVALID_USER_BUFFER);
713    }
714    struct platform_info *pi = (struct platform_info *)args[0];
715    // x86: only have PC as platform
716    pi->arch = PI_ARCH_X86;
717    pi->platform = PI_PLATFORM_PC;
718    return SYSRET(SYS_ERR_OK);
719}
720
721static struct sysret monitor_reclaim_ram(struct capability *kern_cap,
722                                         int cmd, uintptr_t *args)
723{
724    capaddr_t retcn_addr  = (capaddr_t)args[0];
725    uint8_t   retcn_level = (uint8_t)  args[1];
726    cslot_t   ret_slot    = (cslot_t)  args[2];
727    return sys_monitor_reclaim_ram(retcn_addr, retcn_level, ret_slot);
728}
729
730static struct sysret handle_clean_dirty_bits(struct capability *to,
731                                             int cmd, uintptr_t *args)
732{
733    assert(to->type == ObjType_VNode_x86_64_ptable);
734    size_t cleared = 0;
735
736    genpaddr_t dest_gp   = get_address(to);
737    lpaddr_t dest_lp     = gen_phys_to_local_phys(dest_gp);
738    lvaddr_t dest_lv     = local_phys_to_mem(dest_lp);
739    //printf("%s:%s:%d: dest_gp = %"PRIxGENVADDR" dest_lp = %"PRIxLVADDR" dest_lv = %"PRIxLVADDR" \n",
740    //       __FILE__, __FUNCTION__, __LINE__, dest_gp, dest_lp, dest_lv);
741
742    lvaddr_t* addr = (lvaddr_t*)dest_lv;
743
744    if (addr == NULL) {
745        printf("%s:%s:%d: Page table has invalid base address\n",
746               __FILE__, __FUNCTION__, __LINE__);
747        goto out;
748    }
749
750    for (int i=0; i < X86_64_PTABLE_SIZE; i++) {
751        if (addr[i] & X86_64_PTABLE_DIRTY) {
752            cleared++;
753        }
754        addr[i] &= ~X86_64_PTABLE_DIRTY;
755    }
756
757out:
758    return (struct sysret) {
759        .error = SYS_ERR_OK,
760        .value = cleared,
761    };
762}
763
764static struct sysret handle_io(struct capability *to, int cmd, uintptr_t *args)
765{
766    uint64_t    port = args[0];
767    uint64_t    data = args[1]; // ignored for input
768
769    return sys_io(to, cmd, port, data);
770}
771
772static struct sysret handle_vmread(struct capability *to,
773				   int cmd, uintptr_t *args)
774{
775#if defined(__k1om__) || defined(CONFIG_SVM)
776    return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
777#else
778    errval_t err;
779    struct dcb *dcb = to->u.dispatcher.dcb;
780    lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
781    if (vmcs_base != vmptrst()) {
782        err = SYS_ERR_VMKIT_VMX_VMFAIL_INVALID;
783    } else {
784        err = vmread(args[0], (lvaddr_t *)args[1]);
785    }
786    return SYSRET(err);
787#endif
788}
789
790static struct sysret handle_vmwrite(struct capability *to,
791				    int cmd, uintptr_t *args)
792{
793#if defined(__k1om__) || defined(CONFIG_SVM)
794    return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
795#else
796    errval_t err;
797    struct dcb *dcb = to->u.dispatcher.dcb;
798    lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
799    if (vmcs_base != vmptrst()) {
800        err = SYS_ERR_VMKIT_VMX_VMFAIL_INVALID;
801    } else {
802        err = vmwrite(args[0], args[1]);
803    }
804    return SYSRET(err);
805#endif
806}
807
808static struct sysret handle_vmptrld(struct capability *to,
809				    int cmd, uintptr_t *args)
810{
811#if defined(__k1om__) || defined(CONFIG_SVM)
812    return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
813#else
814    errval_t err;
815    struct dcb *dcb = to->u.dispatcher.dcb;
816    lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
817    err = vmptrld(vmcs_base);
818    return SYSRET(err);
819#endif
820}
821
822static struct sysret handle_vmclear(struct capability *to,
823				    int cmd, uintptr_t *args)
824{
825#if defined(__k1om__) || defined(CONFIG_SVM)
826    return SYSRET(SYS_ERR_VMKIT_UNAVAIL);
827#else
828    errval_t err;
829    struct dcb *dcb = to->u.dispatcher.dcb;
830    lpaddr_t vmcs_base = dcb->guest_desc.vmcb.cap.u.frame.base;
831    err = vmclear(vmcs_base);
832    return SYSRET(err);
833#endif
834}
835
836#ifndef __k1om__
837static struct sysret
838handle_dispatcher_setup_guest (struct capability *to, int cmd, uintptr_t *args)
839{
840    errval_t err;
841    struct dcb *dcb = to->u.dispatcher.dcb;
842
843    capaddr_t epp = args[0];
844    capaddr_t vnodep = args[1];
845    capaddr_t vmcbp = args[2];
846    capaddr_t ctrlp = args[3];
847
848    // 0. Enable VM extensions
849    err = vmkit_enable_virtualization();
850    if (err != SYS_ERR_OK) {
851        return SYSRET(err);
852    }
853
854    // 1. Check arguments
855    // Monitor endpoint for exits of this geust
856    struct cte *ep_cte;
857
858    err = caps_lookup_slot(&dcb_current->cspace.cap, epp, 2,
859                           &ep_cte, CAPRIGHTS_READ_WRITE);
860    if (err_is_fail(err)) {
861        return SYSRET(err);
862    }
863    if (ep_cte->cap.type != ObjType_EndPointLMP) {
864        return SYSRET(SYS_ERR_VMKIT_ENDPOINT_INVALID);
865    }
866    err = caps_copy_to_cte(&dcb->guest_desc.monitor_ep, ep_cte, false, 0, 0);
867    if (err_is_fail(err)) {
868        return SYSRET(err_push(err, SYS_ERR_VMKIT_ENDPOINT));
869    }
870
871    // Domain vspace
872    struct capability *vnode_cap;
873    err = caps_lookup_cap(&dcb_current->cspace.cap, vnodep, 2,
874                          &vnode_cap, CAPRIGHTS_WRITE);
875    if (err_is_fail(err)) {
876        return SYSRET(err);
877    }
878    if (vnode_cap->type != ObjType_VNode_x86_64_pml4) {
879        return SYSRET(SYS_ERR_DISP_VSPACE_INVALID);
880    }
881
882    assert(vnode_cap->type == ObjType_VNode_x86_64_pml4);
883
884    // VMCB
885    struct cte *vmcb_cte;
886    err = caps_lookup_slot(&dcb_current->cspace.cap, vmcbp, 2,
887                           &vmcb_cte, CAPRIGHTS_READ_WRITE);
888    if (err_is_fail(err)) {
889        return SYSRET(err);
890    }
891    if (vmcb_cte->cap.type != ObjType_Frame ||
892        vmcb_cte->cap.u.frame.bytes < BASE_PAGE_SIZE) {
893        return SYSRET(SYS_ERR_VMKIT_VMCB_INVALID);
894    }
895    err = caps_copy_to_cte(&dcb->guest_desc.vmcb, vmcb_cte, false, 0, 0);
896    if (err_is_fail(err)) {
897        return SYSRET(err_push(err, SYS_ERR_VMKIT_VMCB));
898    }
899
900    // guest control
901    struct cte *ctrl_cte;
902    err = caps_lookup_slot(&dcb_current->cspace.cap, ctrlp, 2,
903                           &ctrl_cte, CAPRIGHTS_READ_WRITE);
904    if (err_is_fail(err)) {
905        return SYSRET(err);
906    }
907    if (ctrl_cte->cap.type != ObjType_Frame ||
908        ctrl_cte->cap.u.frame.bytes < BASE_PAGE_SIZE) {
909        return SYSRET(SYS_ERR_VMKIT_CTRL_INVALID);
910    }
911    err = caps_copy_to_cte(&dcb->guest_desc.ctrl, ctrl_cte, false, 0, 0);
912    if (err_is_fail(err)) {
913        return SYSRET(err_push(err, SYS_ERR_VMKIT_CTRL));
914    }
915
916#ifndef CONFIG_SVM
917    // Initialize VMCS for the single virtual-CPU here instead of in
918    // userspace, where the privilege level is not 0.
919    err = initialize_vmcs(vmcb_cte->cap.u.frame.base);
920    assert(err_is_ok(err));
921#endif
922
923    // 2. Set up the target DCB
924/*     dcb->guest_desc.monitor_ep = ep_cap; */
925    // set dcb->guest_desc.vspace for VMX (Intel) vmkit.
926    dcb->guest_desc.vspace = get_address(vnode_cap);
927    // set dcb->vspace for SVM (AMD) vmkit.
928    dcb->vspace = get_address(vnode_cap);
929    dcb->is_vm_guest = true;
930/*     dcb->guest_desc.vmcb = vmcb_cap->u.frame.base; */
931/*     dcb->guest_desc.ctrl = (void *)x86_64_phys_to_mem(ctrl_cap->u.frame.base); */
932
933    return SYSRET(SYS_ERR_OK);
934}
935#endif
936
937static struct sysret monitor_handle_domain_id(struct capability *monitor_cap,
938                                              int cmd, uintptr_t *args)
939{
940    capaddr_t cptr = args[0];
941    domainid_t domain_id = args[1];
942
943    return sys_monitor_domain_id(cptr, domain_id);
944}
945
946static struct sysret monitor_get_cap_owner(struct capability *monitor_cap,
947                                           int cmd, uintptr_t *args)
948{
949    capaddr_t root_addr = args[0];
950    uint8_t root_level = args[1];
951    capaddr_t cptr = args[2];
952    uint8_t level = args[3];
953
954    return sys_get_cap_owner(root_addr, root_level, cptr, level);
955}
956
957static struct sysret monitor_set_cap_owner(struct capability *monitor_cap,
958                                           int cmd, uintptr_t *args)
959{
960    capaddr_t root_addr = args[0];
961    uint8_t root_level = args[1];
962    capaddr_t cptr = args[2];
963    uint8_t level = args[3];
964    coreid_t owner = args[4];
965
966    return sys_set_cap_owner(root_addr, root_level, cptr, level, owner);
967}
968
969static struct sysret monitor_lock_cap(struct capability *monitor_cap,
970                                      int cmd, uintptr_t *args)
971{
972    capaddr_t root_addr = args[0];
973    uint8_t root_level = args[1];
974    capaddr_t cptr = args[2];
975    uint8_t level = args[3];
976
977    return sys_lock_cap(root_addr, root_level, cptr, level);
978}
979
980static struct sysret monitor_unlock_cap(struct capability *monitor_cap,
981                                        int cmd, uintptr_t *args)
982{
983    capaddr_t root_addr = args[0];
984    uint8_t root_level = args[1];
985    capaddr_t cptr = args[2];
986    uint8_t level = args[3];
987
988    return sys_unlock_cap(root_addr, root_level, cptr, level);
989}
990
991/**
992 * \brief Set up tracing in the kernel
993 */
994static struct sysret handle_trace_setup(struct capability *cap,
995                                        int cmd, uintptr_t *args)
996{
997    struct capability *frame;
998    errval_t err;
999
1000    /* lookup passed cap */
1001    capaddr_t cptr = args[0];
1002    err = caps_lookup_cap(&dcb_current->cspace.cap, cptr, 2, &frame,
1003                          CAPRIGHTS_READ_WRITE);
1004    if (err_is_fail(err)) {
1005        return SYSRET(err);
1006    }
1007
1008    lpaddr_t lpaddr = gen_phys_to_local_phys(frame->u.frame.base);
1009    kernel_trace_buf = local_phys_to_mem(lpaddr);
1010    //printf("kernel.%u: handle_trace_setup at %lx\n", apic_id, kernel_trace_buf);
1011
1012    // Copy boot applications.
1013    trace_copy_boot_applications();
1014
1015    return SYSRET(SYS_ERR_OK);
1016}
1017
1018static struct sysret handle_irqsrc_get_vec_start(struct capability * to, int cmd,
1019        uintptr_t *args)
1020{
1021    struct sysret ret;
1022    ret.error = SYS_ERR_OK;
1023    ret.value = to->u.irqsrc.vec_start;
1024    return ret;
1025
1026}
1027
1028static struct sysret handle_irqsrc_get_vec_end(struct capability * to, int cmd,
1029        uintptr_t *args)
1030{
1031    struct sysret ret;
1032    ret.error = SYS_ERR_OK;
1033    ret.value = to->u.irqsrc.vec_end;
1034    return ret;
1035
1036}
1037
1038
1039static struct sysret handle_irqdest_get_vector(struct capability *to, int cmd,
1040                                            uintptr_t *args)
1041{
1042    struct sysret ret;
1043    ret.error = SYS_ERR_OK;
1044    ret.value = to->u.irqdest.vector;
1045    return ret;
1046}
1047
1048static struct sysret handle_irqdest_get_cpu(struct capability *to, int cmd,
1049                                            uintptr_t *args)
1050{
1051    struct sysret ret;
1052    ret.error = SYS_ERR_OK;
1053    ret.value = to->u.irqdest.cpu;
1054    return ret;
1055}
1056
1057static struct sysret handle_irqdest_connect(struct capability *to, int cmd,
1058                                            uintptr_t *args)
1059{
1060    return SYSRET(irq_connect(to, args[0]));
1061}
1062
1063static struct sysret handle_irq_table_alloc(struct capability *to, int cmd,
1064                                            uintptr_t *args)
1065{
1066    struct sysret ret;
1067    int outvec;
1068    ret.error = irq_table_alloc(&outvec);
1069    ret.value = outvec;
1070    return ret;
1071}
1072
1073static struct sysret handle_irq_table_alloc_dest_cap(struct capability *to, int cmd,
1074                                            uintptr_t *args)
1075{
1076    return SYSRET(irq_table_alloc_dest_cap(args[0],args[1],args[2], args[3]));
1077}
1078
1079
1080static struct sysret handle_irq_table_set(struct capability *to, int cmd,
1081                                          uintptr_t *args)
1082{
1083    return SYSRET(irq_table_set(args[0], args[1]));
1084}
1085
1086static struct sysret handle_irq_table_delete(struct capability *to, int cmd,
1087                                             uintptr_t *args)
1088{
1089    return SYSRET(irq_table_delete(args[0]));
1090}
1091
1092static struct sysret handle_ipi_notify_send(struct capability *cap,
1093                                            int cmd, uintptr_t *args)
1094{
1095    assert(cap->type == ObjType_Notify_IPI);
1096    return ipi_raise_notify(cap->u.notify_ipi.coreid, cap->u.notify_ipi.chanid);
1097}
1098
1099static struct sysret kernel_ipi_register(struct capability *cap,
1100                                         int cmd, uintptr_t *args)
1101{
1102    assert(cap->type == ObjType_Kernel);
1103    capaddr_t ep = args[0];
1104    int chanid = args[1];
1105    return SYSRET(ipi_register_notification(ep, chanid));
1106}
1107
1108static struct sysret kernel_ipi_delete(struct capability *cap,
1109                                       int cmd, uintptr_t *args)
1110{
1111    assert(cap->type == ObjType_Kernel);
1112    assert(!"NYI");
1113    return SYSRET(SYS_ERR_OK);
1114}
1115
1116static struct sysret dispatcher_dump_ptables(struct capability *cap,
1117                                             int cmd, uintptr_t *args)
1118{
1119    assert(cap->type == ObjType_Dispatcher);
1120
1121    lvaddr_t vaddr = args[0];
1122
1123    printf("kernel_dump_ptables, vaddr=%#"PRIxLVADDR"\n", vaddr);
1124
1125    struct dcb *dispatcher = cap->u.dispatcher.dcb;
1126
1127    paging_dump_tables_around(dispatcher, vaddr);
1128
1129    return SYSRET(SYS_ERR_OK);
1130}
1131
1132static struct sysret dispatcher_dump_capabilities(struct capability *cap,
1133                                             int cmd, uintptr_t *args)
1134{
1135    assert(cap->type == ObjType_Dispatcher);
1136
1137    printf("dispatcher_dump_capabilities\n");
1138
1139    struct dcb *dispatcher = cap->u.dispatcher.dcb;
1140
1141    errval_t err = debug_print_cababilities(dispatcher);
1142
1143    return SYSRET(err);
1144}
1145
1146/*
1147 * \brief Activate performance monitoring
1148 *
1149 * Activates performance monitoring.
1150 * \param xargs Expected parameters in args:
1151 * - performance monitoring type
1152 * - mask for given type
1153 * - Counter id
1154 * - Also count in privileged mode
1155 * - Number of counts before overflow. This parameter may be used to
1156 *   set tradeoff between accuracy and overhead. Set the counter to 0
1157 *   to deactivate the usage of APIC.
1158 * - Endpoint capability to be invoked when the counter overflows.
1159 *   The buffer associated with the endpoint needs to be large enough
1160 *   to hold several overflow notifications depending on the overflow
1161 *   frequency.
1162 */
1163static struct sysret performance_counter_activate(struct capability *cap,
1164                                                  int cmd, uintptr_t *args)
1165{
1166    uint8_t event = args[0];
1167    uint8_t umask = args[1];
1168    uint8_t counter_id = args[2];
1169    bool kernel = args[3];
1170    uint64_t counter_value = args[4];
1171    capaddr_t ep_addr = args[5];
1172
1173    errval_t err;
1174    struct capability *ep;
1175    extern struct capability perfmon_callback_ep;
1176
1177    // Make sure that
1178    assert(ep_addr!=0 || counter_value==0);
1179
1180    perfmon_init();
1181    perfmon_measure_start(event, umask, counter_id, kernel, counter_value);
1182
1183    if(ep_addr!=0) {
1184
1185        err = caps_lookup_cap(&dcb_current->cspace.cap, ep_addr, 2, &ep,
1186                              CAPRIGHTS_READ);
1187        if(err_is_fail(err)) {
1188            return SYSRET(err);
1189        }
1190
1191        perfmon_callback_ep = *ep;
1192    }
1193
1194    return SYSRET(SYS_ERR_OK);
1195}
1196
1197/*
1198 * \brief Write counter values.
1199 */
1200static struct sysret performance_counter_write(struct capability *cap,
1201                                               int cmd, uintptr_t *args)
1202{
1203    uint8_t counter_id = args[0];
1204    uint64_t counter_value = args[1];
1205
1206    perfmon_measure_write(counter_id, counter_value);
1207    return SYSRET(SYS_ERR_OK);
1208}
1209
1210/*
1211 * \brief Deactivate performance counters again.
1212 */
1213static struct sysret performance_counter_deactivate(struct capability *cap,
1214                                                  int cmd, uintptr_t *args)
1215{
1216    perfmon_measure_stop();
1217    return SYSRET(SYS_ERR_OK);
1218}
1219
1220/*
1221 * \brief Return system-wide unique ID of this ID cap.
1222 */
1223static struct sysret handle_idcap_identify(struct capability *cap, int cmd,
1224                                           uintptr_t *args)
1225{
1226    idcap_id_t id;
1227    struct sysret sysret = sys_idcap_identify(cap, &id);
1228    sysret.value = id;
1229
1230    return sysret;
1231}
1232
1233static struct sysret handle_devid_create(struct capability *cap, int cmd,
1234                                           uintptr_t *args)
1235{
1236    assert(cap->type == ObjType_DeviceIDManager);
1237
1238    capaddr_t cnode_cptr = args[0];
1239    capaddr_t cnode_level = args[1];
1240    cslot_t slot = args[2];
1241
1242    uint32_t address = args[3];
1243    uint32_t segflags = args[4];
1244
1245    struct capability devid;
1246    devid.type = ObjType_DeviceID;
1247    devid.u.deviceid.bus      = (uint8_t)(address >> 16);
1248    devid.u.deviceid.device   = (uint8_t)(address >> 8);
1249    devid.u.deviceid.function = (uint8_t)(address);
1250    devid.u.deviceid.type     = (uint8_t)(address >> 24);
1251    devid.u.deviceid.segment  = (uint16_t)(segflags >> 16);
1252    devid.u.deviceid.flags    = (uint16_t)(segflags);
1253
1254    return SYSRET(caps_create_from_existing(&dcb_current->cspace.cap,
1255                                            cnode_cptr, cnode_level,
1256                                            slot, my_core_id, &devid));
1257}
1258
1259static struct sysret kernel_send_init_ipi(struct capability *cap, int cmd,
1260                                          uintptr_t *args)
1261{
1262    coreid_t destination = args[0];
1263//    printk(LOG_DEBUG, "%s:%s:%d: destination=%"PRIuCOREID"\n",
1264//           __FILE__, __FUNCTION__, __LINE__, destination);
1265
1266    apic_send_init_assert(destination, xapic_none);
1267    apic_send_init_deassert();
1268
1269    return SYSRET(SYS_ERR_OK);
1270}
1271
1272static struct sysret kernel_send_start_ipi(struct capability *cap,
1273                                           int cmd,
1274                                           uintptr_t *args)
1275{
1276    coreid_t destination = args[0];
1277    genvaddr_t start_vector = X86_64_REAL_MODE_SEGMENT_TO_REAL_MODE_PAGE(X86_64_REAL_MODE_SEGMENT);
1278//    printk(LOG_DEBUG, "%s:%d: destination=%"PRIuCOREID" start_vector=%"PRIxGENVADDR"\n",
1279//           __FILE__, __LINE__, destination, start_vector);
1280
1281    apic_send_start_up(destination, xapic_none, start_vector);
1282
1283    return SYSRET(SYS_ERR_OK);
1284}
1285
1286static struct sysret kernel_get_global_phys(struct capability *cap,
1287                                           int cmd,
1288                                           uintptr_t *args)
1289{
1290
1291    struct sysret sysret;
1292    sysret.value = mem_to_local_phys((lvaddr_t)global);
1293    sysret.error = SYS_ERR_OK;
1294
1295    return sysret;
1296}
1297
1298static struct sysret kernel_add_kcb(struct capability *kern_cap,
1299                                    int cmd, uintptr_t *args)
1300{
1301    uintptr_t kcb_addr = args[0];
1302    struct kcb *new_kcb = (struct kcb *)kcb_addr;
1303
1304    return sys_kernel_add_kcb(new_kcb);
1305}
1306
1307static struct sysret kernel_remove_kcb(struct capability *kern_cap,
1308                                       int cmd, uintptr_t *args)
1309{
1310    printk(LOG_NOTE, "in kernel_remove_kcb invocation!\n");
1311    uintptr_t kcb_addr = args[0];
1312    struct kcb *to_remove = (struct kcb *)kcb_addr;
1313
1314    return sys_kernel_remove_kcb(to_remove);
1315}
1316
1317static struct sysret kernel_suspend_kcb_sched(struct capability *kern_cap,
1318                                              int cmd, uintptr_t *args)
1319{
1320    printk(LOG_NOTE, "in kernel_suspend_kcb_sched invocation!\n");
1321    return sys_kernel_suspend_kcb_sched((bool)args[0]);
1322}
1323
1324static struct sysret handle_kcb_identify(struct capability *to,
1325                                         int cmd, uintptr_t *args)
1326{
1327    return sys_handle_kcb_identify(to, (struct frame_identity *)args[0]);
1328}
1329
1330
1331typedef struct sysret (*invocation_handler_t)(struct capability *to,
1332                                              int cmd, uintptr_t *args);
1333
1334static invocation_handler_t invocations[ObjType_Num][CAP_MAX_CMD] = {
1335    [ObjType_Dispatcher] = {
1336        [DispatcherCmd_Setup] = handle_dispatcher_setup,
1337        [DispatcherCmd_Properties] = handle_dispatcher_properties,
1338#ifndef __k1om__
1339        [DispatcherCmd_SetupGuest] = handle_dispatcher_setup_guest,
1340#endif
1341        [DispatcherCmd_DumpPTables]  = dispatcher_dump_ptables,
1342        [DispatcherCmd_DumpCapabilities] = dispatcher_dump_capabilities,
1343	[DispatcherCmd_Vmread] = handle_vmread,
1344	[DispatcherCmd_Vmwrite] = handle_vmwrite,
1345	[DispatcherCmd_Vmptrld] = handle_vmptrld,
1346	[DispatcherCmd_Vmclear] = handle_vmclear,
1347    },
1348    [ObjType_KernelControlBlock] = {
1349        [KCBCmd_Identify] = handle_kcb_identify,
1350    },
1351        [ObjType_RAM] = {
1352        [RAMCmd_Noop] = handle_noop,
1353    },
1354    [ObjType_L1CNode] = {
1355        [CNodeCmd_Copy]   = handle_copy,
1356        [CNodeCmd_Mint]   = handle_mint,
1357        [CNodeCmd_Retype] = handle_retype,
1358        [CNodeCmd_Create] = handle_create,
1359        [CNodeCmd_Delete] = handle_delete,
1360        [CNodeCmd_Revoke] = handle_revoke,
1361        [CNodeCmd_GetState] = handle_get_state,
1362        [CNodeCmd_GetSize] = handle_get_size,
1363        [CNodeCmd_Resize] = handle_resize,
1364        [CNodeCmd_CapIdentify] = handle_cap_identify,
1365    },
1366    [ObjType_L2CNode] = {
1367        [CNodeCmd_Copy]   = handle_copy,
1368        [CNodeCmd_Mint]   = handle_mint,
1369        [CNodeCmd_Retype] = handle_retype,
1370        [CNodeCmd_Create] = handle_create,
1371        [CNodeCmd_Delete] = handle_delete,
1372        [CNodeCmd_Revoke] = handle_revoke,
1373        [CNodeCmd_GetState] = handle_get_state,
1374        [CNodeCmd_Resize] = handle_resize,
1375        [CNodeCmd_CapIdentify] = handle_cap_identify,
1376    },
1377    [ObjType_VNode_VTd_root_table] = {
1378        [VNodeCmd_Map]         = handle_map,
1379        [VNodeCmd_Unmap]       = handle_unmap,
1380        [VNodeCmd_ModifyFlags] = handle_vnode_modify_flags,
1381    },
1382    [ObjType_VNode_VTd_ctxt_table] = {
1383        [VNodeCmd_Map]         = handle_map,
1384        [VNodeCmd_Unmap]       = handle_unmap,
1385        [VNodeCmd_ModifyFlags] = handle_vnode_modify_flags,
1386    },
1387    [ObjType_VNode_x86_64_pml5] = {
1388        [VNodeCmd_Map]   = handle_map,
1389        [VNodeCmd_Unmap] = handle_unmap,
1390        [VNodeCmd_ModifyFlags] = handle_vnode_modify_flags,
1391    },
1392    [ObjType_VNode_x86_64_pml4] = {
1393        [VNodeCmd_Map]   = handle_map,
1394        [VNodeCmd_Unmap] = handle_unmap,
1395        [VNodeCmd_ModifyFlags] = handle_vnode_modify_flags,
1396        [VNodeCmd_CopyRemap] = handle_vnode_copy_remap,
1397        [VNodeCmd_Inherit] = handle_inherit,
1398    },
1399    [ObjType_VNode_x86_64_pdpt] = {
1400        [VNodeCmd_Map]   = handle_map,
1401        [VNodeCmd_Unmap] = handle_unmap,
1402        [VNodeCmd_ModifyFlags] = handle_vnode_modify_flags,
1403        [VNodeCmd_CopyRemap] = handle_vnode_copy_remap,
1404        [VNodeCmd_Inherit] = handle_inherit,
1405    },
1406    [ObjType_VNode_x86_64_pdir] = {
1407        [VNodeCmd_Map]   = handle_map,
1408        [VNodeCmd_Unmap] = handle_unmap,
1409        [VNodeCmd_ModifyFlags] = handle_vnode_modify_flags,
1410        [VNodeCmd_CopyRemap] = handle_vnode_copy_remap,
1411        [VNodeCmd_Inherit] = handle_inherit,
1412    },
1413    [ObjType_VNode_x86_64_ptable] = {
1414        [VNodeCmd_CleanDirtyBits] = handle_clean_dirty_bits,
1415        [VNodeCmd_Map]   = handle_map,
1416        [VNodeCmd_Unmap] = handle_unmap,
1417        [VNodeCmd_ModifyFlags] = handle_vnode_modify_flags,
1418        [VNodeCmd_CopyRemap] = handle_vnode_copy_remap,
1419        [VNodeCmd_Inherit] = handle_inherit,
1420    },
1421    [ObjType_Frame_Mapping] = {
1422        [MappingCmd_Destroy] = handle_mapping_destroy,
1423        [MappingCmd_Modify] = handle_mapping_modify,
1424    },
1425    [ObjType_DevFrame_Mapping] = {
1426        [MappingCmd_Destroy] = handle_mapping_destroy,
1427        [MappingCmd_Modify] = handle_mapping_modify,
1428    },
1429    [ObjType_VNode_VTd_root_table_Mapping] = {
1430        [MappingCmd_Destroy] = handle_mapping_destroy,
1431        [MappingCmd_Modify]  = handle_mapping_modify,
1432    },
1433    [ObjType_VNode_VTd_ctxt_table_Mapping] = {
1434        [MappingCmd_Destroy] = handle_mapping_destroy,
1435        [MappingCmd_Modify]  = handle_mapping_modify,
1436    },
1437    [ObjType_VNode_x86_64_pml5_Mapping] = {
1438        [MappingCmd_Destroy] = handle_mapping_destroy,
1439        [MappingCmd_Modify] = handle_mapping_modify,
1440    },
1441    [ObjType_VNode_x86_64_pml4_Mapping] = {
1442        [MappingCmd_Destroy] = handle_mapping_destroy,
1443        [MappingCmd_Modify] = handle_mapping_modify,
1444    },
1445    [ObjType_VNode_x86_64_pdpt_Mapping] = {
1446        [MappingCmd_Destroy] = handle_mapping_destroy,
1447        [MappingCmd_Modify] = handle_mapping_modify,
1448    },
1449    [ObjType_VNode_x86_64_pdir_Mapping] = {
1450        [MappingCmd_Destroy] = handle_mapping_destroy,
1451        [MappingCmd_Modify] = handle_mapping_modify,
1452    },
1453    [ObjType_VNode_x86_64_ptable_Mapping] = {
1454        [MappingCmd_Destroy] = handle_mapping_destroy,
1455        [MappingCmd_Modify] = handle_mapping_modify,
1456    },
1457    [ObjType_VNode_x86_64_ept_pml4] = {
1458        [VNodeCmd_Map]   = handle_map,
1459        [VNodeCmd_Unmap] = handle_unmap,
1460    },
1461    [ObjType_VNode_x86_64_ept_pdpt] = {
1462        [VNodeCmd_Map]   = handle_map,
1463        [VNodeCmd_Unmap] = handle_unmap,
1464    },
1465    [ObjType_VNode_x86_64_ept_pdir] = {
1466        [VNodeCmd_Map]   = handle_map,
1467        [VNodeCmd_Unmap] = handle_unmap,
1468    },
1469    [ObjType_VNode_x86_64_ept_ptable] = {
1470        [VNodeCmd_CleanDirtyBits] = handle_clean_dirty_bits,
1471        [VNodeCmd_Map]   = handle_map,
1472        [VNodeCmd_Unmap] = handle_unmap,
1473    },
1474    [ObjType_Kernel] = {
1475        [KernelCmd_Get_core_id]  = monitor_get_core_id,
1476        [KernelCmd_Get_arch_id]  = monitor_get_arch_id,
1477        [KernelCmd_Identify_cap] = monitor_identify_cap,
1478        [KernelCmd_Identify_domains_cap] = monitor_identify_domains_cap,
1479        [KernelCmd_Remote_relations] = monitor_remote_relations,
1480        [KernelCmd_Cap_has_relations] = monitor_cap_has_relations,
1481        [KernelCmd_Create_cap]   = monitor_create_cap,
1482        [KernelCmd_Copy_existing] = monitor_copy_existing,
1483        [KernelCmd_Nullify_cap]  = monitor_nullify_cap,
1484        [KernelCmd_Setup_trace]  = handle_trace_setup,
1485        [KernelCmd_Register]     = monitor_handle_register,
1486        [KernelCmd_Domain_Id]    = monitor_handle_domain_id,
1487        [KernelCmd_Get_cap_owner] = monitor_get_cap_owner,
1488        [KernelCmd_Set_cap_owner] = monitor_set_cap_owner,
1489        [KernelCmd_Lock_cap]     = monitor_lock_cap,
1490        [KernelCmd_Unlock_cap]   = monitor_unlock_cap,
1491        [KernelCmd_Retype]       = monitor_handle_retype,
1492        [KernelCmd_Has_descendants] = monitor_handle_has_descendants,
1493        [KernelCmd_Is_retypeable] = monitor_handle_is_retypeable,
1494        [KernelCmd_Delete_last]  = monitor_handle_delete_last,
1495        [KernelCmd_Delete_foreigns] = monitor_handle_delete_foreigns,
1496        [KernelCmd_Revoke_mark_target] = monitor_handle_revoke_mark_tgt,
1497        [KernelCmd_Revoke_mark_relations] = monitor_handle_revoke_mark_rels,
1498        [KernelCmd_Delete_step] = monitor_handle_delete_step,
1499        [KernelCmd_Clear_step] = monitor_handle_clear_step,
1500        [KernelCmd_Sync_timer]   = monitor_handle_sync_timer,
1501        [KernelCmd_IPI_Register] = kernel_ipi_register,
1502        [KernelCmd_IPI_Delete]   = kernel_ipi_delete,
1503        [KernelCmd_GetGlobalPhys] = kernel_get_global_phys,
1504        [KernelCmd_Add_kcb]      = kernel_add_kcb,
1505        [KernelCmd_Remove_kcb]   = kernel_remove_kcb,
1506        [KernelCmd_Suspend_kcb_sched]   = kernel_suspend_kcb_sched,
1507        [KernelCmd_Get_platform] = monitor_get_platform,
1508        [KernelCmd_ReclaimRAM] = monitor_reclaim_ram,
1509    },
1510    [ObjType_IPI] = {
1511        [IPICmd_Send_Start] = kernel_send_start_ipi,
1512        [IPICmd_Send_Init] = kernel_send_init_ipi,
1513    },
1514	[ObjType_IRQDest] = {
1515        [IRQDestCmd_Connect] = handle_irqdest_connect,
1516        [IRQDestCmd_GetVector] = handle_irqdest_get_vector,
1517        [IRQDestCmd_GetCpu] = handle_irqdest_get_cpu
1518	},
1519	[ObjType_IRQSrc] = {
1520        [IRQSrcCmd_GetVecStart] = handle_irqsrc_get_vec_start,
1521        [IRQSrcCmd_GetVecEnd] = handle_irqsrc_get_vec_end
1522	},
1523    [ObjType_IRQTable] = {
1524        [IRQTableCmd_Alloc] = handle_irq_table_alloc,
1525        [IRQTableCmd_AllocDestCap] = handle_irq_table_alloc_dest_cap,
1526        [IRQTableCmd_Set] = handle_irq_table_set,
1527        [IRQTableCmd_Delete] = handle_irq_table_delete
1528    },
1529    [ObjType_IO] = {
1530        [IOCmd_Outb] = handle_io,
1531        [IOCmd_Outw] = handle_io,
1532        [IOCmd_Outd] = handle_io,
1533        [IOCmd_Inb] = handle_io,
1534        [IOCmd_Inw] = handle_io,
1535        [IOCmd_Ind] = handle_io
1536    },
1537    [ObjType_Notify_IPI] = {
1538        [NotifyCmd_Send] = handle_ipi_notify_send
1539    },
1540    [ObjType_PerfMon] = {
1541        [PerfmonCmd_Activate] = performance_counter_activate,
1542        [PerfmonCmd_Deactivate] = performance_counter_deactivate,
1543        [PerfmonCmd_Write] = performance_counter_write,
1544    },
1545    [ObjType_ID] = {
1546        [IDCmd_Identify] = handle_idcap_identify,
1547    },
1548    [ObjType_DeviceIDManager] = {
1549        [DeviceIDManager_CreateID] = handle_devid_create,
1550    },
1551};
1552
1553struct sysret sys_vmcall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1554                         uint64_t *args, uint64_t rflags, uint64_t rip,
1555                         struct capability *root);
1556struct sysret sys_vmcall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1557                         uint64_t *args, uint64_t rflags, uint64_t rip,
1558                         struct capability *root)
1559{
1560    struct sysret retval = { .error = SYS_ERR_OK, .value = 0 };
1561
1562    // XXX
1563    // Set dcb_current->disabled correctly.  This should really be
1564    // done in entry.S
1565    // XXX
1566    assert(dcb_current != NULL);
1567    if (dispatcher_is_disabled_ip(dcb_current->disp, rip)) {
1568	dcb_current->disabled = true;
1569    } else {
1570	dcb_current->disabled = false;
1571    }
1572    assert(get_dispatcher_shared_generic(dcb_current->disp)->disabled ==
1573            dcb_current->disabled);
1574
1575    switch(syscall) {
1576    case SYSCALL_INVOKE: /* Handle capability invocation */
1577    {
1578        // unpack "header" word
1579        capaddr_t invoke_cptr = arg0 >> 32;
1580        uint8_t send_level = arg0 >> 24;
1581        uint8_t invoke_level = arg0 >> 16;
1582        uint8_t length_words = arg0 >> 8;
1583        uint8_t flags = arg0;
1584
1585        debug(SUBSYS_SYSCALL, "sys_invoke(0x%x(%d), 0x%lx)\n",
1586              invoke_cptr, invoke_level, arg1);
1587        //printk(LOG_NOTE, "sys_invoke(0x%x(%d), 0x%lx)\n",
1588        //      invoke_cptr, invoke_level, arg1);
1589
1590        // Capability to invoke
1591        struct capability *to = NULL;
1592        retval.error = caps_lookup_cap(root, invoke_cptr, invoke_level,
1593                                       &to, CAPRIGHTS_READ);
1594        if (err_is_fail(retval.error)) {
1595            break;
1596        }
1597
1598        assert(to != NULL);
1599        assert(to->type < ObjType_Num);
1600
1601        // Endpoint cap, do LMP
1602        if (to->type == ObjType_EndPointLMP && !(flags & LMP_FLAG_IDENTIFY)) {
1603
1604            struct dcb *listener = to->u.endpointlmp.listener;
1605            assert(listener != NULL);
1606
1607            if (listener->disp == 0) {
1608                retval.error = SYS_ERR_LMP_NO_TARGET;
1609                break;
1610            }
1611
1612            /* limit length of message from buggy/malicious sender */
1613            length_words = MIN(length_words, LMP_MSG_LENGTH);
1614
1615            // does the sender want to yield their timeslice on success?
1616            bool sync = flags & LMP_FLAG_SYNC;
1617            // does the sender want to yield to the target if undeliverable?
1618            bool yield = flags & LMP_FLAG_YIELD;
1619            // is the cap (if present) to be deleted on send?
1620            bool give_away = flags & LMP_FLAG_GIVEAWAY;
1621
1622            // try to deliver message
1623            retval.error = lmp_deliver(to, dcb_current, args, length_words,
1624                                       arg1, send_level, give_away);
1625
1626            /* Switch to receiver upon successful delivery with sync flag,
1627             * or (some cases of) unsuccessful delivery with yield flag */
1628            enum err_code err_code = err_no(retval.error);
1629            if ((sync && err_is_ok(retval.error)) ||
1630                (yield && (err_code == SYS_ERR_LMP_BUF_OVERFLOW
1631                           || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_LOOKUP
1632                           || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_CNODE_INVALID
1633                           || err_code == SYS_ERR_LMP_CAPTRANSFER_DST_SLOT_OCCUPIED))
1634                    ) {
1635                if (err_is_fail(retval.error)) {
1636                    struct dispatcher_shared_generic *current_disp =
1637                        get_dispatcher_shared_generic(dcb_current->disp);
1638                    struct dispatcher_shared_generic *listener_disp =
1639                        get_dispatcher_shared_generic(listener->disp);
1640                    debug(SUBSYS_DISPATCH, "LMP failed; %.*s yields to %.*s: %u\n",
1641                          DISP_NAME_LEN, current_disp->name,
1642                          DISP_NAME_LEN, listener_disp->name, err_code);
1643                }
1644
1645                // special-case context switch: ensure correct state in current DCB
1646                dispatcher_handle_t handle = dcb_current->disp;
1647                struct dispatcher_shared_x86_64 *disp =
1648                    get_dispatcher_shared_x86_64(handle);
1649                dcb_current->disabled = dispatcher_is_disabled_ip(handle, rip);
1650                struct registers_x86_64 *save_area;
1651                if (dcb_current->disabled) {
1652                    save_area = &disp->disabled_save_area;
1653                } else {
1654                    save_area = &disp->enabled_save_area;
1655                }
1656
1657                // Should be enabled. Else, how do we do an invocation??
1658                if (dcb_current->disabled) {
1659                    panic("Dispatcher needs to be enabled for this invocation");
1660                }
1661
1662                // save calling dispatcher's registers, so that when the dispatcher
1663                // next runs, it has a valid state in the relevant save area.
1664                // Save RIP, RFLAGS, RSP and set RAX (return value) for later resume
1665                save_area->rax = retval.error; // XXX: x86 1st return register
1666                save_area->rip = rip;
1667                save_area->eflags = rflags;
1668                save_area->rsp = user_stack_save;
1669                __asm ("fxsave     %[fxsave_area]\n"
1670                    :
1671                    : [fxsave_area] "m" (save_area->fxsave_area));
1672
1673                if (!dcb_current->is_vm_guest) {
1674                    /* save and zero FS/GS selectors (they're unmodified by the syscall path) */
1675                    __asm ("mov     %%fs, %[fs]     \n\t"
1676                    "mov     %%gs, %[gs]     \n\t"
1677                    "mov     %[zero], %%fs   \n\t"
1678                    "mov     %[zero], %%gs   \n\t"
1679                    : /* No output */
1680                    :
1681                    [fs] "m" (save_area->fs),
1682                    [gs] "m" (save_area->gs),
1683                    [zero] "r" (0)
1684                    );
1685                } else {
1686#ifndef __k1om__
1687#ifdef CONFIG_SVM
1688                    lpaddr_t lpaddr = gen_phys_to_local_phys(dcb_current->guest_desc.vmcb.cap.u.frame.base);
1689                    amd_vmcb_t vmcb;
1690                    amd_vmcb_initialize(&vmcb, (void *)local_phys_to_mem(lpaddr));
1691                    save_area->fs = amd_vmcb_fs_selector_rd(&vmcb);
1692                    save_area->gs = amd_vmcb_gs_selector_rd(&vmcb);
1693#else
1694                    errval_t err;
1695                    err = vmread(VMX_GUEST_FS_SEL, (uint64_t *)&save_area->fs);
1696                    err += vmread(VMX_GUEST_GS_SEL, (uint64_t *)&save_area->gs);
1697                    assert(err_is_ok(err));
1698#endif
1699#else
1700                    panic("VM Guests not supported on Xeon Phi");
1701#endif
1702		        }
1703                dispatch(to->u.endpointlmp.listener);
1704                panic("dispatch returned");
1705            } else {
1706                struct dcb *dcb = to->u.endpointlmp.listener;
1707
1708                schedule_now(dcb);
1709            }
1710        } else { // not endpoint cap, call kernel handler through dispatch table
1711            // printk(LOG_NOTE, "sys_invoke: to->type = %d, cmd = %"PRIu64"\n",
1712            //         to->type, args[0]);
1713
1714            uint64_t cmd = args[0];
1715            if (cmd >= CAP_MAX_CMD) {
1716                printk(LOG_NOTE, "illegal invocation: cmd %lu > MAX_CMD %d\n",
1717                        cmd, CAP_MAX_CMD);
1718                retval.error = SYS_ERR_ILLEGAL_INVOCATION;
1719                break;
1720            }
1721
1722            // Call the invocation
1723            invocation_handler_t invocation = invocations[to->type][cmd];
1724            if(invocation == NULL) {
1725                printk(LOG_WARN, "invocation not found. type: %"PRIu32", cmd: %"PRIu64"\n",
1726                              to->type, cmd);
1727                retval.error = SYS_ERR_ILLEGAL_INVOCATION;
1728            } else {
1729                retval = invocation(to, cmd, &args[1]);
1730            }
1731        }
1732        break;
1733    }
1734
1735        // Yield the CPU to the next dispatcher
1736    case SYSCALL_YIELD:
1737        TRACE(KERNEL, SC_YIELD, 0);
1738        retval = sys_yield((capaddr_t)arg0);
1739        TRACE(KERNEL, SC_YIELD, 1);
1740        break;
1741
1742        // NOP system call for benchmarking purposes
1743    case SYSCALL_NOP:
1744        break;
1745
1746        // Debug print system call
1747    case SYSCALL_PRINT:
1748        TRACE(KERNEL, SC_PRINT, 0);
1749        retval.error = sys_print((char *)arg0, arg1);
1750        TRACE(KERNEL, SC_PRINT, 1);
1751        break;
1752
1753        // Reboot!
1754        // FIXME: this should be a kernel cap invocation or similarly restricted
1755    case SYSCALL_REBOOT:
1756        reboot();
1757        break;
1758
1759    case SYSCALL_X86_RELOAD_LDT:
1760        maybe_reload_ldt(dcb_current, true);
1761        break;
1762
1763        // Temporarily suspend the CPU
1764    case SYSCALL_SUSPEND:
1765        TRACE(KERNEL, SC_SUSPEND, 0);
1766        retval = sys_suspend((bool)arg0);
1767        TRACE(KERNEL, SC_SUSPEND, 1);
1768        break;
1769
1770    case SYSCALL_DEBUG:
1771        switch(arg0) {
1772        case DEBUG_CONTEXT_COUNTER_RESET:
1773            dispatch_csc_reset();
1774            break;
1775
1776        case DEBUG_CONTEXT_COUNTER_READ:
1777            retval.value = dispatch_get_csc();
1778            break;
1779
1780        case DEBUG_TIMESLICE_COUNTER_READ:
1781            retval.value = systime_now();
1782            break;
1783
1784        case DEBUG_FLUSH_CACHE:
1785            wbinvd();
1786            break;
1787
1788        case DEBUG_FLUSH_TLB:
1789            do_full_tlb_flush();
1790            break;
1791
1792        case DEBUG_SEND_IPI:
1793            apic_send_std_ipi(arg1, args[0], args[1]);
1794            break;
1795
1796        case DEBUG_SET_BREAKPOINT:
1797            debugregs_set_breakpoint(arg1, args[0], args[1]);
1798            break;
1799
1800        case DEBUG_GET_TSC_PER_MS:
1801            retval.value = timing_get_tsc_per_ms();
1802            break;
1803
1804        case DEBUG_GET_APIC_TIMER:
1805            retval.value = apic_timer_get_count();
1806            break;
1807
1808        case DEBUG_GET_APIC_TICKS_PER_SEC:
1809            retval.value = timing_get_apic_ticks_per_sec();
1810            break;
1811
1812        case DEBUG_TRACE_PMEM_CTRL:
1813#ifdef TRACE_PMEM_CAPS
1814            if (arg1) {
1815                caps_trace_ctrl(arg1, args[0], args[1]);
1816            } else {
1817                caps_trace_ctrl(arg1, 0, 0);
1818            }
1819#endif
1820            retval.value = 0;
1821            retval.error = SYS_ERR_OK;
1822            break;
1823
1824
1825        case DEBUG_GET_APIC_ID:
1826            retval.value = apic_get_id();
1827            break;
1828
1829        case DEBUG_CREATE_IRQ_SRC_CAP:
1830            retval.error = irq_debug_create_src_cap(arg1, args[0], args[1],
1831                    args[2], args[3]);
1832            break;
1833
1834        case DEBUG_GET_MDB_SIZE:
1835            retval.error = debug_get_mdb_size(&retval.value);
1836            break;
1837
1838        case DEBUG_PRINT_MDB_COUNTERS:
1839            retval.error = debug_print_mdb_counters();
1840            break;
1841
1842        default:
1843            printk(LOG_ERR, "invalid sys_debug msg type\n");
1844        }
1845        break;
1846
1847    default:
1848        printk(LOG_ERR, "sys_syscall: Illegal system call! "
1849               "(0x%lx, 0x%lx, 0x%lx)\n", syscall, arg0, arg1);
1850        retval.error = SYS_ERR_ILLEGAL_SYSCALL;
1851        break;
1852    }
1853
1854    // If dcb_current got removed, dispatch someone else
1855    if (dcb_current == NULL) {
1856        assert(err_is_ok(retval.error));
1857        dispatch(schedule());
1858    }
1859
1860    if (syscall == SYSCALL_INVOKE) {
1861        debug(SUBSYS_SYSCALL, "invoke returning 0x%lx 0x%lx\n",
1862              retval.error, retval.value);
1863    }
1864
1865    return retval;
1866}
1867
1868/* syscall C entry point; called only from entry.S so no prototype in header */
1869struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1870                          uint64_t *args, uint64_t rflags, uint64_t rip);
1871struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
1872                          uint64_t *args, uint64_t rflags, uint64_t rip)
1873{
1874    return sys_vmcall(syscall, arg0, arg1, args, rflags, rip, &dcb_current->cspace.cap);
1875}
1876