1/**
2 * \file
3 * \brief Local descriptor table (LDT) management
4 */
5
6/*
7 * Copyright (c) 2011, 2013, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <barrelfish/barrelfish.h>
16#include <barrelfish/dispatch.h>
17#include <barrelfish/dispatcher_arch.h>
18#include <barrelfish/curdispatcher_arch.h>
19#include <barrelfish/syscalls.h>
20#include <barrelfish/ldt.h>
21#include <arch/ldt.h>
22#include <target/x86_64/barrelfish_kpi/cpu_target.h> // segment_descriptor
23#include <barrelfish_kpi/cpu_arch.h> // segment_descriptor
24#include <stdio.h>
25#include <string.h>
26
27#define LDT_NENTRIES    512     ///< Number of entries in the statically-sized LDT
28
29/// Local segment descriptor table. Shared by all dispatchers in an address space.
30// (XXX: coherence assumption)
31static union segment_descriptor ldt[LDT_NENTRIES];
32
33/// Spinlock protecting LDT in spanned domains
34// (XXX: coherence assumption)
35static spinlock_t ldt_spinlock;
36
37#ifdef ARRAKIS
38
39/*
40 * AMD64 Segmentation Data Structures and definitions
41 */
42
43/*
44 * Selectors
45 */
46
47#define SEL_RPL_MASK    3       /* requester priv level */
48#define ISPL(s) ((s)&3)         /* what is the priority level of a selector */
49#define SEL_KPL 0               /* kernel priority level */
50#define SEL_UPL 3               /* user priority level */
51#define ISLDT(s)        ((s)&SEL_LDT)   /* is it local or global */
52#define SEL_LDT 4               /* local descriptor table */
53#define IDXSEL(s)       (((s)>>3) & 0x1fff)             /* index of selector */
54#define LSEL(s,r)       (((s)<<3) | SEL_LDT | r)        /* a local selector */
55#define GSEL(s,r)       (((s)<<3) | r)                  /* a global selector */
56
57/**
58 * Gate descriptors (e.g. indirect descriptors, trap, interrupt etc. 128 bit)
59 * Only interrupt and trap gates have gd_ist.
60 */
61struct  gate_descriptor {
62    uint64_t gd_looffset:16;       /* gate offset (lsb) */
63    uint64_t gd_selector:16;       /* gate segment selector */
64    uint64_t gd_ist:3;             /* IST table index */
65    uint64_t gd_xx:5;              /* unused */
66    uint64_t gd_type:5;            /* segment type */
67    uint64_t gd_dpl:2;             /* segment descriptor priority level */
68    uint64_t gd_p:1;               /* segment descriptor present */
69    uint64_t gd_hioffset:48;       /* gate offset (msb) */
70    uint64_t sd_xx1:32;
71} __attribute__((packed));
72
73/* system segments and gate types */
74#define SDT_SYSNULL      0      /* system null */
75#define SDT_SYSLDT       2      /* system 64 bit local descriptor table */
76#define SDT_SYSTSS       9      /* system available 64 bit TSS */
77#define SDT_SYSBSY      11      /* system busy 64 bit TSS */
78#define SDT_SYSCGT      12      /* system 64 bit call gate */
79#define SDT_SYSIGT      14      /* system 64 bit interrupt gate */
80#define SDT_SYSTGT      15      /* system 64 bit trap gate */
81
82/* memory segment types */
83#define SDT_MEMRO       16      /* memory read only */
84#define SDT_MEMROA      17      /* memory read only accessed */
85#define SDT_MEMRW       18      /* memory read write */
86#define SDT_MEMRWA      19      /* memory read write accessed */
87#define SDT_MEMROD      20      /* memory read only expand dwn limit */
88#define SDT_MEMRODA     21      /* memory read only expand dwn limit accessed */
89#define SDT_MEMRWD      22      /* memory read write expand dwn limit */
90#define SDT_MEMRWDA     23      /* memory read write expand dwn limit accessed */
91#define SDT_MEME        24      /* memory execute only */
92#define SDT_MEMEA       25      /* memory execute only accessed */
93#define SDT_MEMER       26      /* memory execute read */
94#define SDT_MEMERA      27      /* memory execute read accessed */
95#define SDT_MEMEC       28      /* memory execute only conforming */
96#define SDT_MEMEAC      29      /* memory execute only accessed conforming */
97#define SDT_MEMERC      30      /* memory execute read conforming */
98#define SDT_MEMERAC     31      /* memory execute read accessed conforming */
99
100/*
101 * Size of IDT table
102 */
103#define NIDT    256             /* 32 reserved, 16 h/w, 0 s/w, linux's 0x80 */
104
105/*
106 * Entries in the Global Descriptor Table (GDT)
107 */
108#define NULL_SEL        0       /**< Null descriptor */
109#define KCODE_SEL       1       /**< Kernel code descriptor */
110#define KSTACK_SEL      2       /**< Shared user/kernel stack descriptor */
111#define USTACK_SEL      3       /**< User stack descriptor */
112#define UCODE_SEL       4       /**< User code descriptor */
113#define TSS_LO_SEL      5       /**< Task State Segment (TSS) -- low 64bit */
114#define TSS_HI_SEL      6       /**< Task State Segment (TSS) -- high 64bit */
115#define LDT_LO_SEL      7       /**< Local descriptor table (LDT) -- low */
116#define LDT_HI_SEL      8       /**< Local descriptor table (LDT) -- high */
117#define NGDT_MEM        9       /**< Number of descriptors */
118
119/**
120 * region descriptors, used to load gdt/idt tables before segments yet exist.
121 */
122struct region_descriptor {
123    uint16_t rd_limit;          /**< segment extent */
124    uint64_t rd_base;           /**< base address  */
125} __attribute__((packed));
126
127struct task_state_segment {
128    uint32_t    reserved;
129    uint64_t    rsp[3];
130    uint64_t    reserved2;
131    uint64_t    ist[7];
132    uint64_t    reserved3;
133    uint16_t    reserved4;
134    uint16_t    iomap_base;
135} __attribute__ ((packed));
136
137/**
138 * \brief Global Task State Segment (TSS).
139 *
140 * This is the global, static and only Task State Segment (TSS). It is used
141 * for interrupt and exception handling (stack setup) while in user-space.
142 */
143static struct task_state_segment tss __attribute__ ((aligned (4)));
144
145union segment_descriptor gdt[] __attribute__ ((aligned (4))) = {
146    [NULL_SEL] = {   // Null segment
147        .raw = 0
148    },
149    [KCODE_SEL] = {   // Kernel code segment
150        .d = {
151            .lo_limit = 0xffff,
152            .lo_base = 0,
153            .type = 0xa,
154            .system_desc = 1,
155            .privilege_level = SEL_KPL,
156            .present = 1,
157            .hi_limit = 0xf,
158            .available = 0,
159            .long_mode = 1,
160            .operation_size = 0,
161            .granularity = 1,
162            .hi_base = 0
163        }
164    },
165    [KSTACK_SEL] = {   // Kernel stack segment
166        .d = {
167            .lo_limit = 0xffff,
168            .lo_base = 0,
169            .type = 2,
170            .system_desc = 1,
171            .privilege_level = SEL_KPL,
172            .present = 1,
173            .hi_limit = 0xf,
174            .available = 0,
175            .long_mode = 1,
176            .operation_size = 0,
177            .granularity = 1,
178            .hi_base = 0
179        }
180    },
181    [USTACK_SEL] = {   // User stack segment
182        .d = {
183            .lo_limit = 0xffff,
184            .lo_base = 0,
185            .type = 2,
186            .system_desc = 1,
187            .privilege_level = SEL_UPL,
188            .present = 1,
189            .hi_limit = 0xf,
190            .available = 0,
191            .long_mode = 1,
192            .operation_size = 0,
193            .granularity = 1,
194            .hi_base = 0
195        }
196    },
197    [UCODE_SEL] = {   // User code segment
198        .d = {
199            .lo_limit = 0xffff,
200            .lo_base = 0,
201            .type = 0xa,
202            .system_desc = 1,
203            .privilege_level = SEL_UPL,
204            .present = 1,
205            .hi_limit = 0xf,
206            .available = 0,
207            .long_mode = 1,
208            .operation_size = 0,
209            .granularity = 1,
210            .hi_base = 0
211        }
212    },
213    [TSS_LO_SEL] = {   // Global Task State Segment (TSS), lower 8 bytes
214        .sys_lo = {
215            .lo_limit = sizeof(tss) & 0xffff,
216            .type = SDT_SYSTSS,
217            .privilege_level = SEL_KPL,
218            .present = 1,
219            .hi_limit = (sizeof(tss) >> 16) & 0xf,
220            .available = 0,
221            .granularity = 0,
222        }
223    },
224    [TSS_HI_SEL] = {   // Global Task State Segment (TSS), upper 8 bytes
225        .sys_hi = {
226            .base = 0
227        }
228    },
229    [LDT_LO_SEL] = {    // Local descriptor table (LDT), lower 8 bytes
230        .sys_lo = {
231            .lo_limit = 0, // # 4k pages (since granularity = 1)
232            .lo_base = 0, // changed by context switch path when doing lldt
233            .type = 2, // LDT
234            .privilege_level = SEL_UPL,
235            .present = 1,
236            .hi_limit = 0,
237            .available = 0,
238            .granularity = 1,
239            .hi_base = 0
240        }
241    },
242    [LDT_HI_SEL] = {    // Local descriptor table (LDT), upper 8 bytes
243        .sys_hi = {
244            .base = 0 // changed by context switch path when doing lldt
245        }
246    },
247};
248
249static union segment_descriptor *ldt_descriptor = &gdt[LDT_LO_SEL];
250
251/// Remember current LDT pointer, so we can avoid reloading it
252static lvaddr_t current_ldt_base = -1;
253static size_t current_ldt_npages;
254
255static void maybe_reload_ldt(struct dispatcher_shared_x86_64 *disp, bool force_reload)
256{
257    /* Read fields from user dispatcher once for consistency */
258    lvaddr_t ldt_base = disp->ldt_base;
259    size_t ldt_npages = disp->ldt_npages;
260
261    /* optimize out if this is the same as the previous LDT */
262    if (!force_reload && ldt_base == current_ldt_base
263        && ldt_npages == current_ldt_npages) {
264        return;
265    }
266
267    uint16_t selector = 0;
268
269    if (ldt_base != 0 && ldt_npages != 0) {
270        ldt_descriptor[0].sys_lo.lo_base = ldt_base & ((1ul << 24) - 1);
271        ldt_descriptor[0].sys_lo.hi_base = (ldt_base >> 24) & 0xff;
272        ldt_descriptor[1].sys_hi.base = ldt_base >> 32;
273        assert(ldt_descriptor[0].sys_lo.granularity != 0);
274        ldt_descriptor[0].sys_lo.lo_limit = ldt_npages;
275
276        selector = GSEL(LDT_LO_SEL, SEL_UPL);
277    }
278
279    __asm volatile("lldt %%ax"
280                   : /* No output */
281                   : "a" (selector));
282
283    current_ldt_base = ldt_base;
284    current_ldt_npages = ldt_npages;
285}
286
287/**
288 * \brief Setup default GDT.
289 *
290 * Loads the GDT register with the default GDT and reloads CS and SS
291 * to point to the new entries. Resets all other segment registers to null.
292 * Finally, completes setup of GDT to include TSS base address mapping and
293 * loads TSS into task register.
294 */
295static void gdt_reset(struct dispatcher_generic *disp)
296{
297    lvaddr_t                     ptss = (lvaddr_t)&tss;
298    struct region_descriptor    region = {
299        .rd_limit = sizeof(gdt),
300        .rd_base = (uint64_t)&gdt
301    };
302
303    // Load default GDT
304    __asm volatile("lgdt %[region]" :: [region] "m" (region));
305
306    // Reload segments
307    __asm volatile("mov %[null], %%ds      \n\t"
308                   "mov %[null], %%es      \n\t"
309                   "mov %[ss], %%ss        \n\t"
310                   "mov %[null], %%gs      \n\t"
311                   "mov %[null], %%fs      \n\t"
312                   "pushq %[cs]            \n\t"          // new CS
313                   "lea 1f(%%rip), %%rax   \n\t"          // jumps to after lret
314                   "pushq %%rax            \n\t"          // new IP
315                   "lretq                  \n\t"          // fake return
316                   "1:                     \n\t"          // we'll continue here
317                   : /* No Output */
318                   :
319                   [null] "r" (0),
320                   [ss] "r" (GSEL(KSTACK_SEL, SEL_KPL)),
321                   [cs] "i" (GSEL(KCODE_SEL, SEL_KPL))
322                   : "rax"
323                   );
324
325    // Complete setup of TSS descriptor (by inserting base address of TSS)
326    gdt[TSS_LO_SEL].sys_lo.lo_base = ptss & 0xffffff;
327    gdt[TSS_LO_SEL].sys_lo.hi_base = (ptss >> 24) & 0xff;
328    gdt[TSS_HI_SEL].sys_hi.base = ptss >> 32;
329
330    // Complete setup of TSS
331    tss.rsp[0] = (lvaddr_t)&disp->stack[DISPATCHER_STACK_WORDS];
332
333    // Load task state register
334    __asm volatile("ltr %%ax" :: "a" (GSEL(TSS_LO_SEL, SEL_KPL)));
335}
336
337/* Utility function for code below; initialises a gate_descriptor */
338static void setgd(struct gate_descriptor *gd, void (* handler)(void),
339                  int ist, int type, int dpl, int selector)
340{
341    memset(gd, 0, sizeof(struct gate_descriptor));
342    gd->gd_looffset = (uintptr_t)handler & ((1UL << 16) - 1);
343    gd->gd_hioffset = (uintptr_t)handler >> 16;
344    gd->gd_selector = selector;
345    gd->gd_ist = ist;
346    gd->gd_type = type;
347    gd->gd_dpl = dpl;
348    gd->gd_p = 1;
349}
350
351/**
352 * \brief Define IRQ handler number 'num'.
353 *
354 * This defines an interrupt handler for vector #num. The way this is done is
355 * quite tricky: A block of assembly is emitted, with a label pointing to
356 * the beginning of that block. The label is made known as a symbol by
357 * having a C function _declaration_ directly in front of the block. The
358 * symbol has to be defined extern, so it is global, but its ELF visibility
359 * is set "hidden", so that the symbol does not end up in the GOT. This is
360 * very important for keeping the code position-independent.
361 *
362 * The NOERR/ERR variants depend on whether the hardware delivers an error code.
363 */
364#define HW_EXCEPTION_NOERR(num)                                         \
365    void __attribute__ ((visibility ("hidden"))) hwexc_##num(void);     \
366    __asm (                                                             \
367           "\t.text                                        \n\t"        \
368           "\t.type hwexc_"#num",@function                 \n\t"        \
369           "hwexc_"#num":                                  \n\t"        \
370           "pushq $0                /* dummy error code */ \n\t"        \
371           "pushq $"#num"           /* vector number */    \n\t"        \
372           "jmp    hwexc_common     /* common stuff */     \n\t"        \
373                                                                        )
374
375#define HW_EXCEPTION_ERR(num)                                           \
376    void __attribute__ ((visibility ("hidden"))) hwexc_##num(void);     \
377    __asm (                                                             \
378           "\t.text                                        \n\t"        \
379           "\t.type hwexc_"#num",@function                 \n\t"        \
380           "hwexc_"#num":                                  \n\t"        \
381           "pushq $"#num"           /* vector number */    \n\t"        \
382           "jmp    hwexc_common     /* common stuff */     \n\t"        \
383                                                                        )
384
385__asm (
386    ".text                                              \n\t"
387    "   .type hwexc_common ,@function                   \n\t"
388    /* a kernel fault means something bad happened, so we stack
389     * everything for the debugger to use, in the GDB frame format */
390    "hwexc_common:                                      \n\t"
391    "pushq 6*8(%rsp) /* SS */                           \n\t"
392    "pushq 4*8(%rsp) /* CS */                           \n\t"
393    "pushq 7*8(%rsp) /* EFLAGS */                       \n\t"
394    "pushq 5*8(%rsp) /* RIP */                          \n\t"
395    /* TODO: extend frame size and save FS/GS so we can resume afterwards */
396    "pushq %r15                                         \n\t"
397    "pushq %r14                                         \n\t"
398    "pushq %r13                                         \n\t"
399    "pushq %r12                                         \n\t"
400    "pushq %r11                                         \n\t"
401    "pushq %r10                                         \n\t"
402    "pushq %r9                                          \n\t"
403    "pushq %r8                                          \n\t"
404    "pushq 17*8(%rsp) /* RSP */                         \n\t"
405    "pushq %rbp                                         \n\t"
406    "pushq %rdi                                         \n\t"
407    "pushq %rsi                                         \n\t"
408    "pushq %rdx                                         \n\t"
409    "pushq %rcx                                         \n\t"
410    "pushq %rbx                                         \n\t"
411    "pushq %rax                                         \n\t"
412    "movq 20*8(%rsp), %rdi  /* vector number */         \n\t"
413    "movq 21*8(%rsp), %rsi  /* error code   */          \n\t"
414    "movq %rsp, %rdx       /* save area ptr*/           \n\t"
415    "jmp generic_handle_exception                       \n\t"
416);
417
418// CPU exceptions
419HW_EXCEPTION_NOERR(0);
420HW_EXCEPTION_NOERR(1);
421HW_EXCEPTION_NOERR(2);
422HW_EXCEPTION_NOERR(3);
423HW_EXCEPTION_NOERR(4);
424HW_EXCEPTION_NOERR(5);
425HW_EXCEPTION_NOERR(6);
426HW_EXCEPTION_NOERR(7);
427HW_EXCEPTION_ERR(8);
428HW_EXCEPTION_NOERR(9);
429HW_EXCEPTION_ERR(10);
430HW_EXCEPTION_ERR(11);
431HW_EXCEPTION_ERR(12);
432HW_EXCEPTION_ERR(13);
433HW_EXCEPTION_ERR(14);
434HW_EXCEPTION_NOERR(16);
435HW_EXCEPTION_ERR(17);
436HW_EXCEPTION_NOERR(18);
437HW_EXCEPTION_NOERR(19);
438
439// Reserved as "unhandled exception" handler
440HW_EXCEPTION_NOERR(666);
441
442/**
443 * \brief X86_64 register set
444 *
445 * As defined by GDB.
446 */
447enum gdb_x86_64_register_nums {
448    GDB_X86_64_RAX_REG, GDB_X86_64_RBX_REG, GDB_X86_64_RCX_REG, GDB_X86_64_RDX_REG,
449    GDB_X86_64_RSI_REG, GDB_X86_64_RDI_REG, GDB_X86_64_RBP_REG, GDB_X86_64_RSP_REG,
450    GDB_X86_64_R8_REG, GDB_X86_64_R9_REG, GDB_X86_64_R10_REG, GDB_X86_64_R11_REG,
451    GDB_X86_64_R12_REG, GDB_X86_64_R13_REG, GDB_X86_64_R14_REG, GDB_X86_64_R15_REG,
452    GDB_X86_64_RIP_REG, GDB_X86_64_EFLAGS_REG, GDB_X86_64_CS_REG, GDB_X86_64_SS_REG,
453
454/* these are not saved/used in 64-bit mode, and currently avoided
455    DS_REG, ES_REG, FS_REG, GS_REG,
456*/
457
458/* these are not used yet:
459    ST0_REG, ST1_REG, ST2_REG, ST3_REG, ST4_REG, ST5_REG, ST6_REG, ST7_REG,
460
461    FCTRL_REG, FSTAT_REG, FTAG_REG, FISEG_REG,
462    FIOFF_REG, FOSEG_REG, FOOFF_REG, FOP_REG,
463
464    XMM0_REG, XMM1_REG, XMM2_REG, XMM3_REG, XMM4_REG, XMM5_REG,
465    XMM6_REG, XMM7_REG, XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
466    XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG,
467    MXCSR_REG
468*/
469
470    GDB_X86_64_NUM_REGS /* not a real register; must be last! */
471};
472
473void disp_pagefault(dispatcher_handle_t handle, lvaddr_t fault_address,
474                    uintptr_t error, lvaddr_t ip);
475
476/**
477 * \brief Handles kernel exceptions
478 *
479 * \param vec   Vector number of exception
480 * \param error Error code from CPU, or 0 for an exception without an error code
481 * \param gdb_save_frame Pointer to save area for registers stacked by trap handler
482 */
483static __attribute__ ((used))
484void generic_handle_exception(uint64_t vec, uint64_t error,
485                              uintptr_t *gdb_save_frame)
486{
487    // XXX: This assumes we're enabled. That's not always the case...
488
489    switch(vec) {
490    case IDT_PF:
491        {
492            // For now, disable the dispatcher and call classic exception handler code
493            dispatcher_handle_t handle = disp_disable();
494            lvaddr_t fault_address;
495            arch_registers_state_t *regs = dispatcher_get_enabled_save_area(handle);
496            __asm volatile("mov %%cr2, %[fault_address]"
497                           : [fault_address] "=r" (fault_address));
498
499            // Write registers to dispatcher save area
500            regs->rsp = gdb_save_frame[GDB_X86_64_RSP_REG];
501            regs->eflags = gdb_save_frame[GDB_X86_64_EFLAGS_REG];
502            regs->rip = gdb_save_frame[GDB_X86_64_RIP_REG];
503
504            disp_pagefault(handle, fault_address, error, regs->rip);
505        }
506        break;
507
508    default:
509        debug_printf("Unhandled exception %d at 0x%" PRIxPTR " (error code 0x%lx)\n",
510                     (int)vec, gdb_save_frame[GDB_X86_64_RIP_REG], error);
511        abort();
512        break;
513    }
514}
515
516/**
517 * \brief Interrupt Descriptor Table (IDT) for processor this kernel is running
518 * on.
519 */
520static struct gate_descriptor idt[NIDT] __attribute__ ((aligned (16)));
521
522/**
523 * \brief Sets up the default IDT for current CPU.
524 */
525static void setup_default_idt(void)
526{
527    struct region_descriptor region = {         // set default IDT
528        .rd_limit = NIDT * sizeof(idt[0]) - 1,
529        .rd_base = (uint64_t)&idt
530    };
531    int i;
532
533    // reset IDT
534    memset((void *)&idt, 0, NIDT * sizeof(idt[0]));
535
536    // initialize IDT with default generic handlers
537    for (i = 0; i < NIDT; i++)
538        setgd(&idt[i], hwexc_666, 0, SDT_SYSIGT, SEL_KPL,
539              GSEL(KCODE_SEL, SEL_KPL));
540
541    /* Setup exception handlers */
542    setgd(&idt[0], hwexc_0, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
543    setgd(&idt[1], hwexc_1, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
544    setgd(&idt[2], hwexc_2, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
545    setgd(&idt[3], hwexc_3, 0, SDT_SYSTGT, SEL_UPL, GSEL(KCODE_SEL, SEL_KPL));
546    setgd(&idt[4], hwexc_4, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
547    setgd(&idt[5], hwexc_5, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
548    setgd(&idt[6], hwexc_6, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
549    setgd(&idt[7], hwexc_7, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
550    setgd(&idt[8], hwexc_8, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
551    setgd(&idt[9], hwexc_9, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
552    setgd(&idt[10], hwexc_10, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
553    setgd(&idt[11], hwexc_11, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
554    setgd(&idt[12], hwexc_12, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
555    setgd(&idt[13], hwexc_13, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
556    setgd(&idt[14], hwexc_14, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
557    // Interrupt 15 is undefined
558    setgd(&idt[16], hwexc_16, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
559    setgd(&idt[17], hwexc_17, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
560    setgd(&idt[18], hwexc_18, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
561    setgd(&idt[19], hwexc_19, 0, SDT_SYSTGT, SEL_KPL, GSEL(KCODE_SEL, SEL_KPL));
562    // Interrupts 20 - 31 are reserved
563
564    /* Load IDT register */
565    __asm volatile("lidt %0" :: "m" (region));
566}
567#endif
568
569/** \brief Initialise private (per-dispatcher) LDT */
570void ldt_init_disabled(dispatcher_handle_t handle)
571{
572    errval_t err;
573
574    struct dispatcher_shared_x86_64 *disp =
575        get_dispatcher_shared_x86_64(handle);
576    struct dispatcher_x86_64 *disp_priv = get_dispatcher_x86_64(handle);
577
578    /* setup private (static) LDT, and get kernel to load it */
579    disp->ldt_base = (lvaddr_t) ldt;
580    // XXX: size may not be multiple of page size, but does it really matter?
581    disp->ldt_npages = DIVIDE_ROUND_UP(sizeof(ldt), BASE_PAGE_SIZE);
582#ifdef ARRAKIS
583    gdt_reset(get_dispatcher_generic(handle));
584    maybe_reload_ldt(disp, true);
585    setup_default_idt();
586#else
587    sys_x86_reload_ldt();
588#endif
589
590    /* XXX: kludge to maintain backwards compatibility.
591     * Setup a single segment descriptor that we can use to locate the
592     * current dispatcher (i.e. curdispatcher() always works). This
593     * will be replaced once we switch onto a thread with a real FS segment.
594     */
595    disp_priv->dummyseg[0] = 0;
596    disp_priv->dummyseg[1] = handle;
597    err = ldt_alloc_segment_disabled(handle, disp_priv->dummyseg,
598                                     &disp_priv->disp_seg_selector);
599    if (err_is_fail(err)) {
600        // XXX: can't call usual debug/panic code, as curdispatcher() won't work
601        char buf[128];
602        snprintf(buf, sizeof(buf),
603                 "%.*s.%u: fatal error in ldt_init_disabled(). %s Aborted.\n",
604                 DISP_NAME_LEN, disp->d.name, disp_priv->generic.core_id, err_getstring(err));
605        sys_print(buf, sizeof(buf));
606        while (1) {disp_yield_disabled(handle);}
607    }
608
609    /* load this segment to FS */
610    __asm volatile("mov %%ax, %%fs"
611                   : /* No outputs */
612                   : "a" (disp_priv->disp_seg_selector));
613}
614
615/**
616 * \brief Allocate and fill a segment descriptor in the LDT
617 *
618 * \param handle Dispatcher handle
619 * \param segbase Base of segment
620 * \param ret_selector On success, used to return selector for new segment
621 */
622errval_t ldt_alloc_segment_disabled(dispatcher_handle_t handle, void *segbase,
623                                    uint16_t *ret_selector)
624{
625    // segment descriptors are limited to a 32-bit base address
626    if ((lvaddr_t)segbase >= (1ul << 32)) {
627        return LIB_ERR_SEGBASE_OVER_4G_LIMIT;
628    }
629
630    // construct descriptor
631    union segment_descriptor desc = {
632        .d = {
633            .lo_base = ((lvaddr_t) segbase) & 0xffffff,
634            .hi_base = (((lvaddr_t) segbase) >> 24) & 0xff,
635            .type = 3, /* read/write data, accessed */
636            .system_desc = 1, /* data */
637            .privilege_level = 3, /* user mode */
638            .present = 1,
639            .long_mode = 0,
640            .operation_size = 1,
641        }
642    };
643
644    // find free LDT entry
645    acquire_spinlock(&ldt_spinlock);
646    for (int i = 0; i < LDT_NENTRIES; i++) {
647        if (!ldt[i].d.present) {
648            ldt[i] = desc;
649            release_spinlock(&ldt_spinlock);
650            assert_disabled(ret_selector != NULL);
651            *ret_selector = X86_64_LDT_SELECTOR(i);
652            return SYS_ERR_OK;
653        }
654    }
655    release_spinlock(&ldt_spinlock);
656
657    return LIB_ERR_LDT_FULL;
658}
659
660/**
661 * \brief enabled version of ldt_alloc_segment_disabled()
662 *
663 * Exposed for calls by special-case software that needs to play with segments.
664 */
665errval_t ldt_alloc_segment(void *segbase, uint16_t *ret_selector)
666{
667    dispatcher_handle_t handle = disp_disable();
668    errval_t ret = ldt_alloc_segment_disabled(handle, segbase, ret_selector);
669    disp_enable(handle);
670    return ret;
671}
672
673/**
674 * \brief Free a previously-allocated segment on a specific dispatcher
675 *
676 * \param handle Dispatcher handle
677 * \param selector Segment selector
678 */
679errval_t ldt_free_segment_ondisp(dispatcher_handle_t handle, uint16_t selector)
680{
681    if ((selector & 0x7) != 7) { // XXX: user-priv LDT selector
682        return LIB_ERR_LDT_SELECTOR_INVALID;
683    }
684
685    int idx = X86_64_SELECTOR_IDX(selector);
686
687    // check that this entry is occupied
688    if (idx >= LDT_NENTRIES || !ldt[idx].d.present) {
689        return LIB_ERR_LDT_SELECTOR_INVALID;
690    }
691
692    // mark entry as free
693    ldt[idx].raw = 0;
694    return SYS_ERR_OK;
695}
696
697/**
698 * \brief Free a previously-allocated segment on the current dispatcher
699 *
700 * \param selector Segment selector
701 */
702errval_t ldt_free_segment(uint16_t selector)
703{
704    // strictly speaking, we probably don't need to disable here
705    dispatcher_handle_t handle = disp_disable();
706    errval_t ret = ldt_free_segment_ondisp(handle, selector);
707    disp_enable(handle);
708    return ret;
709}
710
711/**
712 * \brief Update the base address of a previously-allocated segment
713 *
714 * \param selector Segment selector
715 * \param segbase New base of segment
716 */
717errval_t ldt_update_segment(uint16_t selector, void *segbase)
718{
719    if ((selector & 0x7) != 7) { // XXX: user-priv LDT selector
720        return LIB_ERR_LDT_SELECTOR_INVALID;
721    }
722
723    int idx = X86_64_SELECTOR_IDX(selector);
724
725    // check that this entry is occupied
726    if (idx >= LDT_NENTRIES || !ldt[idx].d.present) {
727        return LIB_ERR_LDT_SELECTOR_INVALID;
728    }
729
730    // segment descriptors are limited to a 32-bit base address
731    if ((lvaddr_t)segbase >= (1ul << 32)) {
732        return LIB_ERR_SEGBASE_OVER_4G_LIMIT;
733    }
734
735    // update base address
736    ldt[idx].d.lo_base = ((lvaddr_t) segbase) & 0xffffff;
737    ldt[idx].d.hi_base = (((lvaddr_t) segbase) >> 24) & 0xff;
738
739    return SYS_ERR_OK;
740}
741