1// Copyright 2017 The Fuchsia Authors
2//
3// Use of this source code is governed by a MIT-style
4// license that can be found in the LICENSE file or at
5// https://opensource.org/licenses/MIT
6
7#include "vmexit_priv.h"
8
9#include <bits.h>
10#include <platform.h>
11#include <trace.h>
12
13#include <arch/arm64/el2_state.h>
14#include <arch/hypervisor.h>
15#include <dev/psci.h>
16#include <dev/timer/arm_generic.h>
17#include <hypervisor/ktrace.h>
18#include <vm/fault.h>
19#include <vm/physmap.h>
20#include <zircon/syscalls/hypervisor.h>
21#include <zircon/syscalls/port.h>
22
23#define LOCAL_TRACE 0
24
25#define SET_SYSREG(sysreg)                                                      \
26    ({                                                                          \
27        guest_state->system_state.sysreg = reg;                                 \
28        LTRACEF("guest " #sysreg ": %#lx\n", guest_state->system_state.sysreg); \
29        next_pc(guest_state);                                                   \
30        ZX_OK;                                                                  \
31    })
32
33static constexpr size_t kPageTableLevelShift = 3;
34static constexpr uint16_t kSmcPsci = 0;
35
36enum TimerControl : uint64_t {
37    ENABLE = 1u << 0,
38    IMASK = 1u << 1,
39};
40
41ExceptionSyndrome::ExceptionSyndrome(uint32_t esr) {
42    ec = static_cast<ExceptionClass>(BITS_SHIFT(esr, 31, 26));
43    iss = BITS(esr, 24, 0);
44}
45
46WaitInstruction::WaitInstruction(uint32_t iss) {
47    is_wfe = BIT(iss, 0);
48}
49
50SmcInstruction::SmcInstruction(uint32_t iss) {
51    imm = static_cast<uint16_t>(BITS(iss, 15, 0));
52}
53
54SystemInstruction::SystemInstruction(uint32_t iss) {
55    sysreg = static_cast<SystemRegister>(BITS(iss, 21, 10) >> 6 | BITS_SHIFT(iss, 4, 1));
56    xt = static_cast<uint8_t>(BITS_SHIFT(iss, 9, 5));
57    read = BIT(iss, 0);
58}
59
60SgiRegister::SgiRegister(uint64_t sgir) {
61    aff3 = static_cast<uint8_t>(BITS_SHIFT(sgir, 55, 48));
62    aff2 = static_cast<uint8_t>(BITS_SHIFT(sgir, 39, 32));
63    aff1 = static_cast<uint8_t>(BITS_SHIFT(sgir, 23, 16));
64    rs = static_cast<uint8_t>(BITS_SHIFT(sgir, 47, 44));
65    target_list = static_cast<uint8_t>(BITS_SHIFT(sgir, 15, 0));
66    int_id = static_cast<uint8_t>(BITS_SHIFT(sgir, 27, 24));
67    all_but_local = BIT(sgir, 40);
68}
69
70DataAbort::DataAbort(uint32_t iss) {
71    valid = BIT_SHIFT(iss, 24);
72    access_size = static_cast<uint8_t>(1u << BITS_SHIFT(iss, 23, 22));
73    sign_extend = BIT(iss, 21);
74    xt = static_cast<uint8_t>(BITS_SHIFT(iss, 20, 16));
75    read = !BIT(iss, 6);
76}
77
78static void next_pc(GuestState* guest_state) {
79    guest_state->system_state.elr_el2 += 4;
80}
81
82static void deadline_callback(timer_t* timer, zx_time_t now, void* arg) {
83    auto gich_state = static_cast<GichState*>(arg);
84    __UNUSED zx_status_t status = gich_state->interrupt_tracker.Interrupt(kTimerVector, nullptr);
85    DEBUG_ASSERT(status == ZX_OK);
86}
87
88static zx_status_t handle_wfi_wfe_instruction(uint32_t iss, GuestState* guest_state,
89                                              GichState* gich_state) {
90    next_pc(guest_state);
91    const WaitInstruction wi(iss);
92    if (wi.is_wfe) {
93        ktrace_vcpu_exit(VCPU_WFE_INSTRUCTION, guest_state->system_state.elr_el2);
94        thread_reschedule();
95        return ZX_OK;
96    }
97    ktrace_vcpu_exit(VCPU_WFI_INSTRUCTION, guest_state->system_state.elr_el2);
98
99    bool pending = gich_state->active_interrupts.GetOne(kTimerVector);
100    bool enabled = guest_state->cntv_ctl_el0 & TimerControl::ENABLE;
101    bool masked = guest_state->cntv_ctl_el0 & TimerControl::IMASK;
102    if (pending || !enabled || masked) {
103        thread_yield();
104        return ZX_OK;
105    }
106
107    timer_cancel(&gich_state->timer);
108    uint64_t cntpct_deadline = guest_state->cntv_cval_el0;
109    zx_time_t deadline = cntpct_to_zx_time(cntpct_deadline);
110    if (deadline <= current_time()) {
111        return gich_state->interrupt_tracker.Track(kTimerVector);
112    }
113
114    timer_set_oneshot(&gich_state->timer, deadline, deadline_callback, gich_state);
115    return gich_state->interrupt_tracker.Wait(nullptr);
116}
117
118static zx_status_t handle_smc_instruction(uint32_t iss, GuestState* guest_state,
119                                          zx_port_packet_t* packet) {
120    const SmcInstruction si(iss);
121    if (si.imm != kSmcPsci)
122        return ZX_ERR_NOT_SUPPORTED;
123
124    next_pc(guest_state);
125    switch (guest_state->x[0]) {
126    case PSCI64_CPU_ON:
127        memset(packet, 0, sizeof(*packet));
128        packet->type = ZX_PKT_TYPE_GUEST_VCPU;
129        packet->guest_vcpu.type = ZX_PKT_GUEST_VCPU_STARTUP;
130        packet->guest_vcpu.startup.id = guest_state->x[1];
131        packet->guest_vcpu.startup.entry = guest_state->x[2];
132        guest_state->x[0] = PSCI_SUCCESS;
133        return ZX_ERR_NEXT;
134    default:
135        guest_state->x[0] = PSCI_NOT_SUPPORTED;
136        return ZX_ERR_NOT_SUPPORTED;
137    }
138}
139
140static void clean_invalidate_cache(zx_paddr_t table, size_t index_shift) {
141    // TODO(abdulla): Make this understand concatenated page tables.
142    auto* pte = static_cast<pte_t*>(paddr_to_physmap(table));
143    pte_t page = index_shift > MMU_GUEST_PAGE_SIZE_SHIFT ?
144                 MMU_PTE_L012_DESCRIPTOR_BLOCK : MMU_PTE_L3_DESCRIPTOR_PAGE;
145    for (size_t i = 0; i < PAGE_SIZE / sizeof(pte_t); i++) {
146        pte_t desc = pte[i] & MMU_PTE_DESCRIPTOR_MASK;
147        pte_t paddr = pte[i] & MMU_PTE_OUTPUT_ADDR_MASK;
148        if (desc == page) {
149            zx_vaddr_t vaddr = reinterpret_cast<zx_vaddr_t>(paddr_to_physmap(paddr));
150            arch_clean_invalidate_cache_range(vaddr, 1lu << index_shift);
151        } else if (desc != MMU_PTE_DESCRIPTOR_INVALID) {
152            size_t adjust_shift = MMU_GUEST_PAGE_SIZE_SHIFT - kPageTableLevelShift;
153            clean_invalidate_cache(paddr, index_shift - adjust_shift);
154        }
155    }
156}
157
158static zx_status_t handle_system_instruction(uint32_t iss, uint64_t* hcr, GuestState* guest_state,
159                                             hypervisor::GuestPhysicalAddressSpace* gpas,
160                                             zx_port_packet_t* packet) {
161    const SystemInstruction si(iss);
162    const uint64_t reg = guest_state->x[si.xt];
163
164    switch (si.sysreg) {
165    case SystemRegister::MAIR_EL1:
166        return SET_SYSREG(mair_el1);
167    case SystemRegister::SCTLR_EL1: {
168        if (si.read) {
169            return ZX_ERR_NOT_SUPPORTED;
170        }
171
172        // From ARM DDI 0487B.b, Section D10.2.89: If the value of HCR_EL2.{DC,
173        // TGE} is not {0, 0} then in Non-secure state the PE behaves as if the
174        // value of the SCTLR_EL1.M field is 0 for all purposes other than
175        // returning the value of a direct read of the field.
176        //
177        // Therefore if SCTLR_EL1.M is set to 1, we need to set HCR_EL2.DC to 0
178        // and invalidate the guest physical address space.
179        uint32_t sctlr_el1 = reg & UINT32_MAX;
180        if (sctlr_el1 & SCTLR_ELX_M) {
181            *hcr &= ~HCR_EL2_DC;
182            // Additionally, if the guest has also set SCTLR_EL1.C to 1, we no
183            // longer need to trap writes to virtual memory control registers,
184            // so we can set HCR_EL2.TVM to 0 to improve performance.
185            if (sctlr_el1 & SCTLR_ELX_C) {
186                *hcr &= ~HCR_EL2_TVM;
187            }
188            clean_invalidate_cache(gpas->arch_aspace()->arch_table_phys(), MMU_GUEST_TOP_SHIFT);
189        }
190        guest_state->system_state.sctlr_el1 = sctlr_el1;
191
192        LTRACEF("guest sctlr_el1: %#x\n", sctlr_el1);
193        LTRACEF("guest hcr_el2: %#lx\n", *hcr);
194        next_pc(guest_state);
195        return ZX_OK;
196    }
197    case SystemRegister::TCR_EL1:
198        return SET_SYSREG(tcr_el1);
199    case SystemRegister::TTBR0_EL1:
200        return SET_SYSREG(ttbr0_el1);
201    case SystemRegister::TTBR1_EL1:
202        return SET_SYSREG(ttbr1_el1);
203    case SystemRegister::OSLAR_EL1:
204    case SystemRegister::OSLSR_EL1:
205    case SystemRegister::OSDLR_EL1:
206    case SystemRegister::DBGPRCR_EL1:
207        next_pc(guest_state);
208        // These registers are RAZ/WI. Their state is dictated by the host.
209        if (si.read) {
210            guest_state->x[si.xt] = 0;
211        }
212        return ZX_OK;
213    case SystemRegister::ICC_SGI1R_EL1: {
214        if (si.read) {
215            // ICC_SGI1R_EL1 is write-only.
216            return ZX_ERR_INVALID_ARGS;
217        }
218        SgiRegister sgi(reg);
219        if (sgi.aff3 != 0 || sgi.aff2 != 0 || sgi.aff1 != 0 || sgi.rs != 0) {
220            return ZX_ERR_NOT_SUPPORTED;
221        }
222
223        memset(packet, 0, sizeof(*packet));
224        packet->type = ZX_PKT_TYPE_GUEST_VCPU;
225        packet->guest_vcpu.type = ZX_PKT_GUEST_VCPU_INTERRUPT;
226        if (sgi.all_but_local) {
227            auto vpid = BITS(guest_state->system_state.vmpidr_el2, 8, 0);
228            packet->guest_vcpu.interrupt.mask = ~(static_cast<uint64_t>(1) << vpid);
229        } else {
230            packet->guest_vcpu.interrupt.mask = sgi.target_list;
231        }
232        packet->guest_vcpu.interrupt.vector = sgi.int_id;
233        next_pc(guest_state);
234        return ZX_ERR_NEXT;
235    }
236    }
237
238    dprintf(CRITICAL, "Unhandled system register %#x\n", static_cast<uint16_t>(si.sysreg));
239    return ZX_ERR_NOT_SUPPORTED;
240}
241
242static zx_status_t handle_instruction_abort(GuestState* guest_state,
243                                            hypervisor::GuestPhysicalAddressSpace* gpas) {
244    const zx_vaddr_t guest_paddr = guest_state->hpfar_el2;
245    zx_status_t status = gpas->PageFault(guest_paddr);
246    if (status != ZX_OK) {
247        dprintf(CRITICAL, "Unhandled instruction abort %#lx\n", guest_paddr);
248    }
249    return status;
250}
251
252static zx_status_t handle_data_abort(uint32_t iss, GuestState* guest_state,
253                                     hypervisor::GuestPhysicalAddressSpace* gpas,
254                                     hypervisor::TrapMap* traps,
255                                     zx_port_packet_t* packet) {
256    zx_vaddr_t guest_paddr = guest_state->hpfar_el2;
257    hypervisor::Trap* trap;
258    zx_status_t status = traps->FindTrap(ZX_GUEST_TRAP_BELL, guest_paddr, &trap);
259    switch (status) {
260    case ZX_ERR_NOT_FOUND:
261        status = gpas->PageFault(guest_paddr);
262        if (status != ZX_OK) {
263            dprintf(CRITICAL, "Unhandled data abort %#lx\n", guest_paddr);
264        }
265        return status;
266    case ZX_OK:
267        break;
268    default:
269        return status;
270    }
271    next_pc(guest_state);
272
273    // Combine the lower bits of FAR_EL2 with HPFAR_EL2 to get the exact IPA.
274    guest_paddr |= guest_state->far_el2 & (PAGE_SIZE - 1);
275    LTRACEF("guest far_el2: %#lx\n", guest_state->far_el2);
276
277    const DataAbort data_abort(iss);
278    switch (trap->kind()) {
279    case ZX_GUEST_TRAP_BELL:
280        if (data_abort.read)
281            return ZX_ERR_NOT_SUPPORTED;
282        *packet = {};
283        packet->key = trap->key();
284        packet->type = ZX_PKT_TYPE_GUEST_BELL;
285        packet->guest_bell.addr = guest_paddr;
286        if (!trap->HasPort())
287            return ZX_ERR_BAD_STATE;
288        return trap->Queue(*packet, nullptr);
289    case ZX_GUEST_TRAP_MEM:
290        if (!data_abort.valid)
291            return ZX_ERR_IO_DATA_INTEGRITY;
292        *packet = {};
293        packet->key = trap->key();
294        packet->type = ZX_PKT_TYPE_GUEST_MEM;
295        packet->guest_mem.addr = guest_paddr;
296        packet->guest_mem.access_size = data_abort.access_size;
297        packet->guest_mem.sign_extend = data_abort.sign_extend;
298        packet->guest_mem.xt = data_abort.xt;
299        packet->guest_mem.read = data_abort.read;
300        if (!data_abort.read)
301            packet->guest_mem.data = guest_state->x[data_abort.xt];
302        return ZX_ERR_NEXT;
303    default:
304        return ZX_ERR_BAD_STATE;
305    }
306}
307
308zx_status_t vmexit_handler(uint64_t* hcr, GuestState* guest_state, GichState* gich_state,
309                           hypervisor::GuestPhysicalAddressSpace* gpas, hypervisor::TrapMap* traps,
310                           zx_port_packet_t* packet) {
311    LTRACEF("guest esr_el1: %#x\n", guest_state->system_state.esr_el1);
312    LTRACEF("guest esr_el2: %#x\n", guest_state->esr_el2);
313    LTRACEF("guest elr_el2: %#lx\n", guest_state->system_state.elr_el2);
314    LTRACEF("guest spsr_el2: %#x\n", guest_state->system_state.spsr_el2);
315
316    ExceptionSyndrome syndrome(guest_state->esr_el2);
317    zx_status_t status;
318    switch (syndrome.ec) {
319    case ExceptionClass::WFI_WFE_INSTRUCTION:
320        LTRACEF("handling wfi/wfe instruction, iss %#x\n", syndrome.iss);
321        status = handle_wfi_wfe_instruction(syndrome.iss, guest_state, gich_state);
322        break;
323    case ExceptionClass::SMC_INSTRUCTION:
324        LTRACEF("handling smc instruction, iss %#x func %#lx\n", syndrome.iss, guest_state->x[0]);
325        ktrace_vcpu_exit(VCPU_SMC_INSTRUCTION, guest_state->system_state.elr_el2);
326        status = handle_smc_instruction(syndrome.iss, guest_state, packet);
327        break;
328    case ExceptionClass::SYSTEM_INSTRUCTION:
329        LTRACEF("handling system instruction\n");
330        ktrace_vcpu_exit(VCPU_SYSTEM_INSTRUCTION, guest_state->system_state.elr_el2);
331        status = handle_system_instruction(syndrome.iss, hcr, guest_state, gpas, packet);
332        break;
333    case ExceptionClass::INSTRUCTION_ABORT:
334        LTRACEF("handling instruction abort at %#lx\n", guest_state->hpfar_el2);
335        ktrace_vcpu_exit(VCPU_INSTRUCTION_ABORT, guest_state->system_state.elr_el2);
336        status = handle_instruction_abort(guest_state, gpas);
337        break;
338    case ExceptionClass::DATA_ABORT:
339        LTRACEF("handling data abort at %#lx\n", guest_state->hpfar_el2);
340        ktrace_vcpu_exit(VCPU_DATA_ABORT, guest_state->system_state.elr_el2);
341        status = handle_data_abort(syndrome.iss, guest_state, gpas, traps, packet);
342        break;
343    default:
344        LTRACEF("unhandled exception syndrome, ec %#x iss %#x\n",
345                static_cast<uint32_t>(syndrome.ec), syndrome.iss);
346        ktrace_vcpu_exit(VCPU_UNKNOWN, guest_state->system_state.elr_el2);
347        status = ZX_ERR_NOT_SUPPORTED;
348        break;
349    }
350    if (status != ZX_OK && status != ZX_ERR_NEXT && status != ZX_ERR_CANCELED) {
351        dprintf(CRITICAL, "VM exit handler for %u (%s) to EL%u at %lx returned %d\n",
352                static_cast<uint32_t>(syndrome.ec),
353                exception_class_name(syndrome.ec),
354                BITS_SHIFT(guest_state->system_state.spsr_el2, 3, 2),
355                guest_state->system_state.elr_el2,
356                status);
357    }
358    return status;
359}
360