1/**
2 * \file
3 * \brief x86-64 execution and miscellany
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2009, 2010, 2011, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <kernel.h>
16#include <init.h>
17#include <barrelfish_kpi/cpu.h>
18#include <barrelfish_kpi/cpu_arch.h>
19#include <exec.h>
20#include <irq.h>
21#include <x86.h>
22#include <dispatch.h>
23#include <target/x86_64/barrelfish_kpi/cpu_target.h>
24
25/**
26 * \brief Reboots the system.
27 *
28 * This function tries hard not to return.
29 */
30void reboot(void)
31{
32    struct region_descriptor region = {
33        .rd_limit = 0,
34        .rd_base = 0
35    };
36
37    printk(LOG_NOTE, "Rebooting...\n");
38
39    // try PCI reset register
40    uint8_t val = inb(0xcf9) & ~0x6;
41    val |= 0x2; // hard reset mode
42    outb(0xcf9, val);
43    val |= 0x4; // do the reset!
44    outb(0xcf9, val);
45
46    // try to reboot using keyboard controller hack (this works on QEMU)
47    printk(LOG_NOTE, "PCI reset failed, trying keyboard controller\n");
48    // try 10 times!
49    for (int i = 0; i < 10; i++) {
50        // toggle reset line
51        outb(0x64, 0xfe);
52    }
53
54    // Otherwise load invalid IDT and cause illegal opcode for triple fault
55    printk(LOG_NOTE, "Keyboard controller reset failed, trying triple fault\n");
56    __asm volatile("lidt        %[region]       \n\t"
57                   "ud2                         \n\t"
58                   : /* No output */
59                   : [region] "m" (region)
60                   );
61
62    halt(); // trick GCC into thinking we don't return
63}
64
65/**
66 * \brief Triggers a debugger breakpoint.
67 */
68void breakpoint(void)
69{
70    if(idt_initialized) {
71        hw_breakpoint();
72    } else {
73        printk(LOG_PANIC,
74               "Cannot trap into debugger -- Interrupts not set up yet!\n");
75    }
76}
77
78/**
79 * \brief Go to user-space at entry point 'entry'.
80 *
81 * This function goes to user-space and starts executing the program at
82 * its entry point at virtual address 'entry'.
83 *
84 * \param entry Entry point address of program to execute.
85 */
86void __attribute__ ((noreturn))
87execute(lvaddr_t entry)
88{
89    // FIXME: make argument
90    uintptr_t arg = get_dispatcher_shared_generic(dcb_current->disp)->udisp;
91#if defined(__k1om__)
92    uint32_t mxcsr_value = 0x00200000;
93#else
94    uint32_t mxcsr_value = 0x00001f80;
95#endif
96    /*
97     * Go to user-space using SYSRETQ -- the Q is very important, so operand
98     * size is 64-bit. Otherwise we return to compatibility mode.
99     *
100     * We set the startup contents of the RFLAGS register into R11. RCX is
101     * set to the entry point address of the user-space program. All other
102     * general-purpose registers are zeroed.
103     */
104    __asm volatile ("movq       %[flags], %%r11         \n\t"
105                    "ldmxcsr    %[mxcsr_value]          \n\t"
106                    "movq       $0, %%rsi               \n\t"
107                    "movq       $0, %%rdx               \n\t"
108                    "movq       $0, %%r8                \n\t"
109                    "movq       $0, %%r9                \n\t"
110                    "movq       $0, %%r10               \n\t"
111                    "movq       $0, %%r12               \n\t"
112                    "movq       $0, %%r13               \n\t"
113                    "movq       $0, %%r14               \n\t"
114                    "movq       $0, %%r15               \n\t"
115                    "movq       $0, %%rax               \n\t"
116                    "movq       $0, %%rbx               \n\t"
117                    "movq       $0, %%rbp               \n\t"
118                    "movq       $0, %%rsp               \n\t"
119                    "mov        %%dx, %%fs              \n\t"
120                    "mov        %%dx, %%gs              \n\t"
121                    "fninit                             \n\t"
122                    "sysretq                            \n\t"
123                    : /* No output */
124                    :
125                    [entry] "c" (entry),
126                    [disp] "D" (arg),
127                    [flags] "i" (USER_RFLAGS),
128                    [mxcsr_value] "m" (mxcsr_value)
129                    );
130
131    // Trick GCC to believe us not to return
132    halt();
133}
134
135/**
136 * \brief Resume the given user-space snapshot.
137 *
138 * This function resumes user-space execution by restoring the CPU
139 * registers with the ones given in the array, pointed to by 'regs'.
140 */
141void __attribute__ ((noreturn)) resume(arch_registers_state_t *state)
142{
143    struct registers_x86_64 *regs = state;
144    __asm volatile ("pushq      %[ss]                   \n\t"
145                    "pushq       7*8(%[regs])           \n\t"   // RSP
146                    "pushq      %[rflags]               \n\t"
147                    "pushq      %[cs]                   \n\t"
148                    "pushq      16*8(%[regs])           \n\t"   // RIP
149                    "fxrstor     %[fxsave_area]         \n\t"
150                    "mov         %[fs], %%fs            \n\t"
151                    "mov         %[gs], %%gs            \n\t"
152                    "movq        0*8(%[regs]), %%rax    \n\t"
153                    "movq        2*8(%[regs]), %%rcx    \n\t"
154                    "movq        3*8(%[regs]), %%rdx    \n\t"
155                    "movq        4*8(%[regs]), %%rsi    \n\t"
156                    "movq        5*8(%[regs]), %%rdi    \n\t"
157                    "movq        6*8(%[regs]), %%rbp    \n\t"
158                    "movq        8*8(%[regs]), %%r8     \n\t"
159                    "movq        9*8(%[regs]), %%r9     \n\t"
160                    "movq       10*8(%[regs]), %%r10    \n\t"
161                    "movq       11*8(%[regs]), %%r11    \n\t"
162                    "movq       12*8(%[regs]), %%r12    \n\t"
163                    "movq       13*8(%[regs]), %%r13    \n\t"
164                    "movq       14*8(%[regs]), %%r14    \n\t"
165                    "movq       15*8(%[regs]), %%r15    \n\t"
166                    "movq        1*8(%[regs]), %%rbx    \n\t"   // RBX was base register
167                    "iretq                              \n\t"
168                    : /* No output */
169                    :
170                    [regs] "b" (regs),
171                    [ss] "i" (GSEL(USTACK_SEL, SEL_UPL)),
172                    [cs] "i" (GSEL(UCODE_SEL, SEL_UPL)),
173                    [fs] "m" (regs->fs),
174                    [gs] "m" (regs->gs),
175                    [fxsave_area] "m" (regs->fxsave_area),
176                    [rflags] "r" ((regs->eflags & USER_RFLAGS_MASK)
177                                  | USER_RFLAGS)
178                    );
179
180    // Trick GCC to believe us not to return
181    halt();
182}
183
184/**
185 * \brief Halt processor until an interrupt arrives
186 *
187 * For use in the idle loop when nothing is runnable.
188 */
189void __attribute__ ((noreturn)) wait_for_interrupt(void)
190{
191#ifdef __k1om__
192    __asm volatile("lea k1om_kernel_stack(%%rip), %%rsp\n\t"
193                   "addq %[stack_size], %%rsp\n\t"
194                   "sti                 \n\t"
195                   // The instruction right after STI is still in interrupt
196                   // shadow. To avoid unecessary calls to HLT we insert a nop
197                   // to make sure pending interrupts are handeled immediately.
198                   "nop                 \n\t"
199                   "hlt                 \n\t"
200                   :: [stack_size] "i" (K1OM_KERNEL_STACK_SIZE) : "rsp" );
201#else
202    __asm volatile("lea x86_64_kernel_stack(%%rip), %%rsp\n\t"
203                   "addq %[stack_size], %%rsp\n\t"
204                   "sti                 \n\t"
205                   // The instruction right after STI is still in interrupt
206                   // shadow. To avoid unecessary calls to HLT we insert a nop
207                   // to make sure pending interrupts are handeled immediately.
208                   "nop                 \n\t"
209                   "hlt                 \n\t"
210                   :: [stack_size] "i" (X86_64_KERNEL_STACK_SIZE) : "rsp" );
211#endif
212    panic("hlt should not return");
213}
214
215/**
216 * \brief Detects monitor/mwait support
217 *
218 * \note Result is cached for subsequent calls to this function.
219 *
220 * \retval true monitor/mwait is supported on this core
221 * \retval false monitor/mwait is not supported on this core
222 */
223bool has_monitor_mwait(void)
224{
225    static bool called = false;
226    static bool mwait_support = false;
227    if (!called) {
228        uint32_t eax, ebx, ecx, edx;
229        cpuid(1, &eax, &ebx, &ecx, &edx);
230        mwait_support = ecx & (1 << 3);
231    }
232
233    return mwait_support;
234}
235
236/**
237 * \brief Use MONITOR/MWAIT to block until a given word changes
238 *
239 * \param base      Virtual address of 64-bit word to monitor
240 * \param lastval   Previous value of word
241 * \param extensions Processor-specific extensions (zero for defaults)
242 * \param hints     Processor-specific hints (zero for defaults)
243 *
244 * Returns when the 64-bit word at base is not equal to lastval.
245 */
246void monitor_mwait(lvaddr_t base, uint64_t lastval, uint32_t extensions,
247                   uint32_t hints)
248{
249    volatile uint64_t *val = (uint64_t *)base;
250
251    assert(extensions == 0);
252    assert(hints == 0);
253
254    while(*val == lastval) {
255        monitor(base, extensions, hints);
256        if(*val != lastval) {
257            return;
258        }
259        mwait(hints, extensions);
260    }
261}
262
263/// Remember current LDT pointer, so we can avoid reloading it
264lvaddr_t current_ldt_base = -1;
265size_t current_ldt_npages;
266
267void maybe_reload_ldt(struct dcb *dcb, bool force_reload)
268{
269    struct dispatcher_shared_x86_64 *disp =
270        get_dispatcher_shared_x86_64(dcb->disp);
271
272    /* Read fields from user dispatcher once for consistency */
273    lvaddr_t ldt_base = disp->ldt_base;
274    size_t ldt_npages = disp->ldt_npages;
275
276    /* optimize out if this is the same as the previous LDT */
277    if (!force_reload && ldt_base == current_ldt_base
278        && ldt_npages == current_ldt_npages) {
279        return;
280    }
281
282    uint16_t selector = 0;
283
284    if (ldt_base != 0 && ldt_npages != 0) {
285        extern union segment_descriptor *ldt_descriptor;
286        ldt_descriptor[0].sys_lo.lo_base = ldt_base & ((1ul << 24) - 1);
287        ldt_descriptor[0].sys_lo.hi_base = (ldt_base >> 24) & 0xff;
288        ldt_descriptor[1].sys_hi.base = ldt_base >> 32;
289        assert(ldt_descriptor[0].sys_lo.granularity != 0);
290        ldt_descriptor[0].sys_lo.lo_limit = ldt_npages;
291
292        selector = GSEL(LDT_LO_SEL, SEL_UPL);
293    }
294
295    __asm volatile("lldt %%ax"
296                   : /* No output */
297                   : "a" (selector));
298
299    current_ldt_base = ldt_base;
300    current_ldt_npages = ldt_npages;
301}
302