1// Copyright 2017 The Fuchsia Authors 2// 3// Use of this source code is governed by a MIT-style 4// license that can be found in the LICENSE file or at 5// https://opensource.org/licenses/MIT 6 7#include "vmexit_priv.h" 8 9#include <bits.h> 10#include <platform.h> 11#include <trace.h> 12 13#include <arch/arm64/el2_state.h> 14#include <arch/hypervisor.h> 15#include <dev/psci.h> 16#include <dev/timer/arm_generic.h> 17#include <hypervisor/ktrace.h> 18#include <vm/fault.h> 19#include <vm/physmap.h> 20#include <zircon/syscalls/hypervisor.h> 21#include <zircon/syscalls/port.h> 22 23#define LOCAL_TRACE 0 24 25#define SET_SYSREG(sysreg) \ 26 ({ \ 27 guest_state->system_state.sysreg = reg; \ 28 LTRACEF("guest " #sysreg ": %#lx\n", guest_state->system_state.sysreg); \ 29 next_pc(guest_state); \ 30 ZX_OK; \ 31 }) 32 33static constexpr size_t kPageTableLevelShift = 3; 34static constexpr uint16_t kSmcPsci = 0; 35 36enum TimerControl : uint64_t { 37 ENABLE = 1u << 0, 38 IMASK = 1u << 1, 39}; 40 41ExceptionSyndrome::ExceptionSyndrome(uint32_t esr) { 42 ec = static_cast<ExceptionClass>(BITS_SHIFT(esr, 31, 26)); 43 iss = BITS(esr, 24, 0); 44} 45 46WaitInstruction::WaitInstruction(uint32_t iss) { 47 is_wfe = BIT(iss, 0); 48} 49 50SmcInstruction::SmcInstruction(uint32_t iss) { 51 imm = static_cast<uint16_t>(BITS(iss, 15, 0)); 52} 53 54SystemInstruction::SystemInstruction(uint32_t iss) { 55 sysreg = static_cast<SystemRegister>(BITS(iss, 21, 10) >> 6 | BITS_SHIFT(iss, 4, 1)); 56 xt = static_cast<uint8_t>(BITS_SHIFT(iss, 9, 5)); 57 read = BIT(iss, 0); 58} 59 60SgiRegister::SgiRegister(uint64_t sgir) { 61 aff3 = static_cast<uint8_t>(BITS_SHIFT(sgir, 55, 48)); 62 aff2 = static_cast<uint8_t>(BITS_SHIFT(sgir, 39, 32)); 63 aff1 = static_cast<uint8_t>(BITS_SHIFT(sgir, 23, 16)); 64 rs = static_cast<uint8_t>(BITS_SHIFT(sgir, 47, 44)); 65 target_list = static_cast<uint8_t>(BITS_SHIFT(sgir, 15, 0)); 66 int_id = static_cast<uint8_t>(BITS_SHIFT(sgir, 27, 24)); 67 all_but_local = BIT(sgir, 40); 68} 69 70DataAbort::DataAbort(uint32_t iss) { 71 valid = BIT_SHIFT(iss, 24); 72 access_size = static_cast<uint8_t>(1u << BITS_SHIFT(iss, 23, 22)); 73 sign_extend = BIT(iss, 21); 74 xt = static_cast<uint8_t>(BITS_SHIFT(iss, 20, 16)); 75 read = !BIT(iss, 6); 76} 77 78static void next_pc(GuestState* guest_state) { 79 guest_state->system_state.elr_el2 += 4; 80} 81 82static void deadline_callback(timer_t* timer, zx_time_t now, void* arg) { 83 auto gich_state = static_cast<GichState*>(arg); 84 __UNUSED zx_status_t status = gich_state->interrupt_tracker.Interrupt(kTimerVector, nullptr); 85 DEBUG_ASSERT(status == ZX_OK); 86} 87 88static zx_status_t handle_wfi_wfe_instruction(uint32_t iss, GuestState* guest_state, 89 GichState* gich_state) { 90 next_pc(guest_state); 91 const WaitInstruction wi(iss); 92 if (wi.is_wfe) { 93 ktrace_vcpu_exit(VCPU_WFE_INSTRUCTION, guest_state->system_state.elr_el2); 94 thread_reschedule(); 95 return ZX_OK; 96 } 97 ktrace_vcpu_exit(VCPU_WFI_INSTRUCTION, guest_state->system_state.elr_el2); 98 99 bool pending = gich_state->active_interrupts.GetOne(kTimerVector); 100 bool enabled = guest_state->cntv_ctl_el0 & TimerControl::ENABLE; 101 bool masked = guest_state->cntv_ctl_el0 & TimerControl::IMASK; 102 if (pending || !enabled || masked) { 103 thread_yield(); 104 return ZX_OK; 105 } 106 107 timer_cancel(&gich_state->timer); 108 uint64_t cntpct_deadline = guest_state->cntv_cval_el0; 109 zx_time_t deadline = cntpct_to_zx_time(cntpct_deadline); 110 if (deadline <= current_time()) { 111 return gich_state->interrupt_tracker.Track(kTimerVector); 112 } 113 114 timer_set_oneshot(&gich_state->timer, deadline, deadline_callback, gich_state); 115 return gich_state->interrupt_tracker.Wait(nullptr); 116} 117 118static zx_status_t handle_smc_instruction(uint32_t iss, GuestState* guest_state, 119 zx_port_packet_t* packet) { 120 const SmcInstruction si(iss); 121 if (si.imm != kSmcPsci) 122 return ZX_ERR_NOT_SUPPORTED; 123 124 next_pc(guest_state); 125 switch (guest_state->x[0]) { 126 case PSCI64_CPU_ON: 127 memset(packet, 0, sizeof(*packet)); 128 packet->type = ZX_PKT_TYPE_GUEST_VCPU; 129 packet->guest_vcpu.type = ZX_PKT_GUEST_VCPU_STARTUP; 130 packet->guest_vcpu.startup.id = guest_state->x[1]; 131 packet->guest_vcpu.startup.entry = guest_state->x[2]; 132 guest_state->x[0] = PSCI_SUCCESS; 133 return ZX_ERR_NEXT; 134 default: 135 guest_state->x[0] = PSCI_NOT_SUPPORTED; 136 return ZX_ERR_NOT_SUPPORTED; 137 } 138} 139 140static void clean_invalidate_cache(zx_paddr_t table, size_t index_shift) { 141 // TODO(abdulla): Make this understand concatenated page tables. 142 auto* pte = static_cast<pte_t*>(paddr_to_physmap(table)); 143 pte_t page = index_shift > MMU_GUEST_PAGE_SIZE_SHIFT ? 144 MMU_PTE_L012_DESCRIPTOR_BLOCK : MMU_PTE_L3_DESCRIPTOR_PAGE; 145 for (size_t i = 0; i < PAGE_SIZE / sizeof(pte_t); i++) { 146 pte_t desc = pte[i] & MMU_PTE_DESCRIPTOR_MASK; 147 pte_t paddr = pte[i] & MMU_PTE_OUTPUT_ADDR_MASK; 148 if (desc == page) { 149 zx_vaddr_t vaddr = reinterpret_cast<zx_vaddr_t>(paddr_to_physmap(paddr)); 150 arch_clean_invalidate_cache_range(vaddr, 1lu << index_shift); 151 } else if (desc != MMU_PTE_DESCRIPTOR_INVALID) { 152 size_t adjust_shift = MMU_GUEST_PAGE_SIZE_SHIFT - kPageTableLevelShift; 153 clean_invalidate_cache(paddr, index_shift - adjust_shift); 154 } 155 } 156} 157 158static zx_status_t handle_system_instruction(uint32_t iss, uint64_t* hcr, GuestState* guest_state, 159 hypervisor::GuestPhysicalAddressSpace* gpas, 160 zx_port_packet_t* packet) { 161 const SystemInstruction si(iss); 162 const uint64_t reg = guest_state->x[si.xt]; 163 164 switch (si.sysreg) { 165 case SystemRegister::MAIR_EL1: 166 return SET_SYSREG(mair_el1); 167 case SystemRegister::SCTLR_EL1: { 168 if (si.read) { 169 return ZX_ERR_NOT_SUPPORTED; 170 } 171 172 // From ARM DDI 0487B.b, Section D10.2.89: If the value of HCR_EL2.{DC, 173 // TGE} is not {0, 0} then in Non-secure state the PE behaves as if the 174 // value of the SCTLR_EL1.M field is 0 for all purposes other than 175 // returning the value of a direct read of the field. 176 // 177 // Therefore if SCTLR_EL1.M is set to 1, we need to set HCR_EL2.DC to 0 178 // and invalidate the guest physical address space. 179 uint32_t sctlr_el1 = reg & UINT32_MAX; 180 if (sctlr_el1 & SCTLR_ELX_M) { 181 *hcr &= ~HCR_EL2_DC; 182 // Additionally, if the guest has also set SCTLR_EL1.C to 1, we no 183 // longer need to trap writes to virtual memory control registers, 184 // so we can set HCR_EL2.TVM to 0 to improve performance. 185 if (sctlr_el1 & SCTLR_ELX_C) { 186 *hcr &= ~HCR_EL2_TVM; 187 } 188 clean_invalidate_cache(gpas->arch_aspace()->arch_table_phys(), MMU_GUEST_TOP_SHIFT); 189 } 190 guest_state->system_state.sctlr_el1 = sctlr_el1; 191 192 LTRACEF("guest sctlr_el1: %#x\n", sctlr_el1); 193 LTRACEF("guest hcr_el2: %#lx\n", *hcr); 194 next_pc(guest_state); 195 return ZX_OK; 196 } 197 case SystemRegister::TCR_EL1: 198 return SET_SYSREG(tcr_el1); 199 case SystemRegister::TTBR0_EL1: 200 return SET_SYSREG(ttbr0_el1); 201 case SystemRegister::TTBR1_EL1: 202 return SET_SYSREG(ttbr1_el1); 203 case SystemRegister::OSLAR_EL1: 204 case SystemRegister::OSLSR_EL1: 205 case SystemRegister::OSDLR_EL1: 206 case SystemRegister::DBGPRCR_EL1: 207 next_pc(guest_state); 208 // These registers are RAZ/WI. Their state is dictated by the host. 209 if (si.read) { 210 guest_state->x[si.xt] = 0; 211 } 212 return ZX_OK; 213 case SystemRegister::ICC_SGI1R_EL1: { 214 if (si.read) { 215 // ICC_SGI1R_EL1 is write-only. 216 return ZX_ERR_INVALID_ARGS; 217 } 218 SgiRegister sgi(reg); 219 if (sgi.aff3 != 0 || sgi.aff2 != 0 || sgi.aff1 != 0 || sgi.rs != 0) { 220 return ZX_ERR_NOT_SUPPORTED; 221 } 222 223 memset(packet, 0, sizeof(*packet)); 224 packet->type = ZX_PKT_TYPE_GUEST_VCPU; 225 packet->guest_vcpu.type = ZX_PKT_GUEST_VCPU_INTERRUPT; 226 if (sgi.all_but_local) { 227 auto vpid = BITS(guest_state->system_state.vmpidr_el2, 8, 0); 228 packet->guest_vcpu.interrupt.mask = ~(static_cast<uint64_t>(1) << vpid); 229 } else { 230 packet->guest_vcpu.interrupt.mask = sgi.target_list; 231 } 232 packet->guest_vcpu.interrupt.vector = sgi.int_id; 233 next_pc(guest_state); 234 return ZX_ERR_NEXT; 235 } 236 } 237 238 dprintf(CRITICAL, "Unhandled system register %#x\n", static_cast<uint16_t>(si.sysreg)); 239 return ZX_ERR_NOT_SUPPORTED; 240} 241 242static zx_status_t handle_instruction_abort(GuestState* guest_state, 243 hypervisor::GuestPhysicalAddressSpace* gpas) { 244 const zx_vaddr_t guest_paddr = guest_state->hpfar_el2; 245 zx_status_t status = gpas->PageFault(guest_paddr); 246 if (status != ZX_OK) { 247 dprintf(CRITICAL, "Unhandled instruction abort %#lx\n", guest_paddr); 248 } 249 return status; 250} 251 252static zx_status_t handle_data_abort(uint32_t iss, GuestState* guest_state, 253 hypervisor::GuestPhysicalAddressSpace* gpas, 254 hypervisor::TrapMap* traps, 255 zx_port_packet_t* packet) { 256 zx_vaddr_t guest_paddr = guest_state->hpfar_el2; 257 hypervisor::Trap* trap; 258 zx_status_t status = traps->FindTrap(ZX_GUEST_TRAP_BELL, guest_paddr, &trap); 259 switch (status) { 260 case ZX_ERR_NOT_FOUND: 261 status = gpas->PageFault(guest_paddr); 262 if (status != ZX_OK) { 263 dprintf(CRITICAL, "Unhandled data abort %#lx\n", guest_paddr); 264 } 265 return status; 266 case ZX_OK: 267 break; 268 default: 269 return status; 270 } 271 next_pc(guest_state); 272 273 // Combine the lower bits of FAR_EL2 with HPFAR_EL2 to get the exact IPA. 274 guest_paddr |= guest_state->far_el2 & (PAGE_SIZE - 1); 275 LTRACEF("guest far_el2: %#lx\n", guest_state->far_el2); 276 277 const DataAbort data_abort(iss); 278 switch (trap->kind()) { 279 case ZX_GUEST_TRAP_BELL: 280 if (data_abort.read) 281 return ZX_ERR_NOT_SUPPORTED; 282 *packet = {}; 283 packet->key = trap->key(); 284 packet->type = ZX_PKT_TYPE_GUEST_BELL; 285 packet->guest_bell.addr = guest_paddr; 286 if (!trap->HasPort()) 287 return ZX_ERR_BAD_STATE; 288 return trap->Queue(*packet, nullptr); 289 case ZX_GUEST_TRAP_MEM: 290 if (!data_abort.valid) 291 return ZX_ERR_IO_DATA_INTEGRITY; 292 *packet = {}; 293 packet->key = trap->key(); 294 packet->type = ZX_PKT_TYPE_GUEST_MEM; 295 packet->guest_mem.addr = guest_paddr; 296 packet->guest_mem.access_size = data_abort.access_size; 297 packet->guest_mem.sign_extend = data_abort.sign_extend; 298 packet->guest_mem.xt = data_abort.xt; 299 packet->guest_mem.read = data_abort.read; 300 if (!data_abort.read) 301 packet->guest_mem.data = guest_state->x[data_abort.xt]; 302 return ZX_ERR_NEXT; 303 default: 304 return ZX_ERR_BAD_STATE; 305 } 306} 307 308zx_status_t vmexit_handler(uint64_t* hcr, GuestState* guest_state, GichState* gich_state, 309 hypervisor::GuestPhysicalAddressSpace* gpas, hypervisor::TrapMap* traps, 310 zx_port_packet_t* packet) { 311 LTRACEF("guest esr_el1: %#x\n", guest_state->system_state.esr_el1); 312 LTRACEF("guest esr_el2: %#x\n", guest_state->esr_el2); 313 LTRACEF("guest elr_el2: %#lx\n", guest_state->system_state.elr_el2); 314 LTRACEF("guest spsr_el2: %#x\n", guest_state->system_state.spsr_el2); 315 316 ExceptionSyndrome syndrome(guest_state->esr_el2); 317 zx_status_t status; 318 switch (syndrome.ec) { 319 case ExceptionClass::WFI_WFE_INSTRUCTION: 320 LTRACEF("handling wfi/wfe instruction, iss %#x\n", syndrome.iss); 321 status = handle_wfi_wfe_instruction(syndrome.iss, guest_state, gich_state); 322 break; 323 case ExceptionClass::SMC_INSTRUCTION: 324 LTRACEF("handling smc instruction, iss %#x func %#lx\n", syndrome.iss, guest_state->x[0]); 325 ktrace_vcpu_exit(VCPU_SMC_INSTRUCTION, guest_state->system_state.elr_el2); 326 status = handle_smc_instruction(syndrome.iss, guest_state, packet); 327 break; 328 case ExceptionClass::SYSTEM_INSTRUCTION: 329 LTRACEF("handling system instruction\n"); 330 ktrace_vcpu_exit(VCPU_SYSTEM_INSTRUCTION, guest_state->system_state.elr_el2); 331 status = handle_system_instruction(syndrome.iss, hcr, guest_state, gpas, packet); 332 break; 333 case ExceptionClass::INSTRUCTION_ABORT: 334 LTRACEF("handling instruction abort at %#lx\n", guest_state->hpfar_el2); 335 ktrace_vcpu_exit(VCPU_INSTRUCTION_ABORT, guest_state->system_state.elr_el2); 336 status = handle_instruction_abort(guest_state, gpas); 337 break; 338 case ExceptionClass::DATA_ABORT: 339 LTRACEF("handling data abort at %#lx\n", guest_state->hpfar_el2); 340 ktrace_vcpu_exit(VCPU_DATA_ABORT, guest_state->system_state.elr_el2); 341 status = handle_data_abort(syndrome.iss, guest_state, gpas, traps, packet); 342 break; 343 default: 344 LTRACEF("unhandled exception syndrome, ec %#x iss %#x\n", 345 static_cast<uint32_t>(syndrome.ec), syndrome.iss); 346 ktrace_vcpu_exit(VCPU_UNKNOWN, guest_state->system_state.elr_el2); 347 status = ZX_ERR_NOT_SUPPORTED; 348 break; 349 } 350 if (status != ZX_OK && status != ZX_ERR_NEXT && status != ZX_ERR_CANCELED) { 351 dprintf(CRITICAL, "VM exit handler for %u (%s) to EL%u at %lx returned %d\n", 352 static_cast<uint32_t>(syndrome.ec), 353 exception_class_name(syndrome.ec), 354 BITS_SHIFT(guest_state->system_state.spsr_el2, 3, 2), 355 guest_state->system_state.elr_el2, 356 status); 357 } 358 return status; 359} 360