1// Copyright 2017 The Fuchsia Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <assert.h> 6#include <math.h> 7#include <string.h> 8#include <unittest/unittest.h> 9 10#include "register-set.h" 11 12namespace { 13 14// Write a NaN double value to the given uint64_t (which is how most of the 15// registers are stored in the structs). 16void WriteNaNDouble(uint64_t* output) { 17 double nan_value = nan(""); 18 memcpy(output, &nan_value, sizeof(double)); 19} 20 21} // namespace 22 23void general_regs_fill_test_values(zx_thread_state_general_regs_t* regs) { 24 for (uint32_t index = 0; index < sizeof(*regs); ++index) { 25 ((uint8_t*)regs)[index] = static_cast<uint8_t>(index + 1); 26 } 27// Set various flags bits that will read back the same. 28#if defined(__x86_64__) 29 // Here we set all flag bits that are modifiable from user space or 30 // that are not modifiable but are expected to read back as 1, with the 31 // exception of the trap flag (bit 8, which would interfere with 32 // execution if we set it). 33 // 34 // Note that setting the direction flag (bit 10) helps test whether the 35 // kernel correctly handles taking an interrupt when that flag is set 36 // (see ZX-998). 37 regs->rflags = 38 (1 << 0) | // CF: carry flag 39 (1 << 1) | // Reserved, always 1 40 (1 << 2) | // PF: parity flag 41 (1 << 4) | // AF: adjust flag 42 (1 << 6) | // ZF: zero flag 43 (1 << 7) | // SF: sign flag 44 (1 << 9) | // IF: interrupt enable flag (set by kernel) 45 (1 << 10) | // DF: direction flag 46 (1 << 11) | // OF: overflow flag 47 (1 << 14) | // NT: nested task flag 48 (1 << 18) | // AC: alignment check flag 49 (1 << 21); // ID: used for testing for CPUID support 50#elif defined(__aarch64__) 51 // Only set the 4 flag bits that are readable and writable by the 52 // instructions "msr nzcv, REG" and "mrs REG, nzcv". 53 regs->cpsr = 0xf0000000; 54#endif 55} 56 57void fp_regs_fill_test_values(zx_thread_state_fp_regs* regs) { 58 memset(regs, 0, sizeof(zx_thread_state_fp_regs)); 59#if defined(__x86_64__) 60 for (size_t i = 0; i < 7; i++) 61 regs->st[i].low = i; 62 63 // Write NaN to the last value. 64 WriteNaNDouble(®s->st[7].low); 65#elif defined(__aarch64__) 66// No FP struct on ARM (vector only). 67#else 68#error Unsupported architecture 69#endif 70} 71 72void vector_regs_fill_test_values(zx_thread_state_vector_regs* regs) { 73 memset(regs, 0, sizeof(zx_thread_state_vector_regs)); 74#if defined(__x86_64__) 75 for (uint64_t i = 0; i < 16; i++) { 76 // Only sets the XMM registers (first two) since that's all that's guaranteed. 77 regs->zmm[i].v[0] = i; 78 regs->zmm[i].v[1] = i << 8; 79 regs->zmm[i].v[2] = 0; 80 regs->zmm[i].v[3] = 0; 81 } 82 83 // Write NaN to the last value. 84 WriteNaNDouble(®s->zmm[15].v[0]); 85#elif defined(__aarch64__) 86 for (uint64_t i = 0; i < 32; i++) { 87 regs->v[i].low = i; 88 regs->v[i].high = i << 8; 89 } 90 91 // Write NaN to the last value. 92 WriteNaNDouble(®s->v[31].low); 93#else 94#error Unsupported architecture 95#endif 96} 97 98bool general_regs_expect_eq(const zx_thread_state_general_regs_t& regs1, 99 const zx_thread_state_general_regs_t& regs2) { 100 BEGIN_HELPER; 101#define CHECK_REG(FIELD) EXPECT_EQ(regs1.FIELD, regs2.FIELD, "Reg " #FIELD) 102#if defined(__x86_64__) 103 CHECK_REG(rax); 104 CHECK_REG(rbx); 105 CHECK_REG(rcx); 106 CHECK_REG(rdx); 107 CHECK_REG(rsi); 108 CHECK_REG(rdi); 109 CHECK_REG(rbp); 110 CHECK_REG(rsp); 111 CHECK_REG(r8); 112 CHECK_REG(r9); 113 CHECK_REG(r10); 114 CHECK_REG(r11); 115 CHECK_REG(r12); 116 CHECK_REG(r13); 117 CHECK_REG(r14); 118 CHECK_REG(r15); 119 CHECK_REG(rip); 120 CHECK_REG(rflags); 121#elif defined(__aarch64__) 122 for (int regnum = 0; regnum < 30; ++regnum) { 123 char name[10]; 124 snprintf(name, sizeof(name), "Reg r[%d]", regnum); 125 EXPECT_EQ(regs1.r[regnum], regs2.r[regnum], name); 126 } 127 CHECK_REG(lr); 128 CHECK_REG(sp); 129 CHECK_REG(pc); 130 CHECK_REG(cpsr); 131#else 132#error Unsupported architecture 133#endif 134#undef CHECK_REG 135 END_HELPER; 136} 137 138bool fp_regs_expect_eq(const zx_thread_state_fp_regs_t& regs1, 139 const zx_thread_state_fp_regs_t& regs2) { 140#if defined(__x86_64__) 141 BEGIN_HELPER; 142 143 // This just tests the MMX registers. 144 EXPECT_EQ(regs1.st[0].low, regs2.st[0].low, "Reg st[0].low"); 145 EXPECT_EQ(regs1.st[1].low, regs2.st[1].low, "Reg st[1].low"); 146 EXPECT_EQ(regs1.st[2].low, regs2.st[2].low, "Reg st[2].low"); 147 EXPECT_EQ(regs1.st[3].low, regs2.st[3].low, "Reg st[3].low"); 148 EXPECT_EQ(regs1.st[4].low, regs2.st[4].low, "Reg st[4].low"); 149 EXPECT_EQ(regs1.st[5].low, regs2.st[5].low, "Reg st[5].low"); 150 EXPECT_EQ(regs1.st[6].low, regs2.st[6].low, "Reg st[6].low"); 151 EXPECT_EQ(regs1.st[7].low, regs2.st[7].low, "Reg st[7].low"); 152 153 END_HELPER; 154#elif defined(__aarch64__) 155 // No FP regs on ARM (uses vector regs for FP). 156 (void)regs1; 157 (void)regs2; 158 return true; 159#else 160#error Unsupported architecture 161#endif 162} 163 164bool vector_regs_expect_eq(const zx_thread_state_vector_regs_t& regs1, 165 const zx_thread_state_vector_regs_t& regs2) { 166 BEGIN_HELPER; 167#if defined(__x86_64__) 168 // Only check the first 16 registers (guaranteed to work). 169 for (int reg = 0; reg < 16; reg++) { 170 // Only check the low 128 bits (guaranteed to work). 171 EXPECT_EQ(regs1.zmm[reg].v[0], regs2.zmm[reg].v[0]); 172 EXPECT_EQ(regs1.zmm[reg].v[1], regs2.zmm[reg].v[1]); 173 } 174#elif defined(__aarch64__) 175 for (int i = 0; i < 32; i++) { 176 EXPECT_EQ(regs1.v[i].high, regs2.v[i].high); 177 EXPECT_EQ(regs1.v[i].low, regs2.v[i].low); 178 } 179#else 180#error Unsupported architecture 181#endif 182 END_HELPER; 183} 184 185// spin_with_general_regs() function. 186#if defined(__x86_64__) 187static_assert(offsetof(zx_thread_state_general_regs_t, rax) == 8 * 0, ""); 188static_assert(offsetof(zx_thread_state_general_regs_t, rbx) == 8 * 1, ""); 189static_assert(offsetof(zx_thread_state_general_regs_t, rcx) == 8 * 2, ""); 190static_assert(offsetof(zx_thread_state_general_regs_t, rdx) == 8 * 3, ""); 191static_assert(offsetof(zx_thread_state_general_regs_t, rsi) == 8 * 4, ""); 192static_assert(offsetof(zx_thread_state_general_regs_t, rdi) == 8 * 5, ""); 193static_assert(offsetof(zx_thread_state_general_regs_t, rbp) == 8 * 6, ""); 194static_assert(offsetof(zx_thread_state_general_regs_t, rsp) == 8 * 7, ""); 195static_assert(offsetof(zx_thread_state_general_regs_t, r8) == 8 * 8, ""); 196static_assert(offsetof(zx_thread_state_general_regs_t, r9) == 8 * 9, ""); 197static_assert(offsetof(zx_thread_state_general_regs_t, r10) == 8 * 10, ""); 198static_assert(offsetof(zx_thread_state_general_regs_t, r11) == 8 * 11, ""); 199static_assert(offsetof(zx_thread_state_general_regs_t, r12) == 8 * 12, ""); 200static_assert(offsetof(zx_thread_state_general_regs_t, r13) == 8 * 13, ""); 201static_assert(offsetof(zx_thread_state_general_regs_t, r14) == 8 * 14, ""); 202static_assert(offsetof(zx_thread_state_general_regs_t, r15) == 8 * 15, ""); 203static_assert(offsetof(zx_thread_state_general_regs_t, rip) == 8 * 16, ""); 204static_assert(offsetof(zx_thread_state_general_regs_t, rflags) == 8 * 17, ""); 205static_assert(sizeof(zx_thread_state_general_regs_t) == 8 * 18, ""); 206__asm__(".pushsection .text, \"ax\", @progbits\n" 207 ".global spin_with_general_regs\n" 208 "spin_with_general_regs:\n" 209 // Set flags using POPF. Note that we use POPF rather than SAHF 210 // because POPF is able to set more flags than SAHF. 211 "pushq 8*17(%rdi)\n" 212 "popfq\n" 213 // Load general purpose registers. 214 "movq 8*0(%rdi), %rax\n" 215 "movq 8*1(%rdi), %rbx\n" 216 "movq 8*2(%rdi), %rcx\n" 217 "movq 8*3(%rdi), %rdx\n" 218 "movq 8*4(%rdi), %rsi\n" 219 // Skip assigning rdi here and assign it last. 220 "movq 8*6(%rdi), %rbp\n" 221 "movq 8*7(%rdi), %rsp\n" 222 "movq 8*8(%rdi), %r8\n" 223 "movq 8*9(%rdi), %r9\n" 224 "movq 8*10(%rdi), %r10\n" 225 "movq 8*11(%rdi), %r11\n" 226 "movq 8*12(%rdi), %r12\n" 227 "movq 8*13(%rdi), %r13\n" 228 "movq 8*14(%rdi), %r14\n" 229 "movq 8*15(%rdi), %r15\n" 230 "movq 8*5(%rdi), %rdi\n" 231 ".global spin_with_general_regs_spin_address\n" 232 "spin_with_general_regs_spin_address:\n" 233 "jmp spin_with_general_regs_spin_address\n" 234 ".popsection\n"); 235#elif defined(__aarch64__) 236static_assert(offsetof(zx_thread_state_general_regs_t, r[0]) == 8 * 0, ""); 237static_assert(offsetof(zx_thread_state_general_regs_t, r[1]) == 8 * 1, ""); 238static_assert(offsetof(zx_thread_state_general_regs_t, lr) == 8 * 30, ""); 239static_assert(offsetof(zx_thread_state_general_regs_t, sp) == 8 * 31, ""); 240static_assert(offsetof(zx_thread_state_general_regs_t, pc) == 8 * 32, ""); 241static_assert(offsetof(zx_thread_state_general_regs_t, cpsr) == 8 * 33, ""); 242static_assert(sizeof(zx_thread_state_general_regs_t) == 8 * 34, ""); 243__asm__(".pushsection .text, \"ax\", %progbits\n" 244 ".global spin_with_general_regs\n" 245 "spin_with_general_regs:\n" 246 // Load sp via a temporary register. 247 "ldr x1, [x0, #8*31]\n" 248 "mov sp, x1\n" 249 // Load NZCV flags, a subset of the PSTATE/CPSR register. 250 "ldr x1, [x0, #8*33]\n" 251 "msr nzcv, x1\n" 252 // Load general purpose registers. 253 // Skip assigning x0 and x1 here and assign them last. 254 "ldp x2, x3, [x0, #8*2]\n" 255 "ldp x4, x5, [x0, #8*4]\n" 256 "ldp x6, x7, [x0, #8*6]\n" 257 "ldp x8, x9, [x0, #8*8]\n" 258 "ldp x10, x11, [x0, #8*10]\n" 259 "ldp x12, x13, [x0, #8*12]\n" 260 "ldp x14, x15, [x0, #8*14]\n" 261 "ldp x16, x17, [x0, #8*16]\n" 262 "ldp x18, x19, [x0, #8*18]\n" 263 "ldp x20, x21, [x0, #8*20]\n" 264 "ldp x22, x23, [x0, #8*22]\n" 265 "ldp x24, x25, [x0, #8*24]\n" 266 "ldp x26, x27, [x0, #8*26]\n" 267 "ldp x28, x29, [x0, #8*28]\n" 268 "ldr x30, [x0, #8*30]\n" 269 "ldp x0, x1, [x0]\n" 270 ".global spin_with_general_regs_spin_address\n" 271 "spin_with_general_regs_spin_address:\n" 272 "b spin_with_general_regs_spin_address\n" 273 ".popsection\n"); 274#else 275#error Unsupported architecture 276#endif 277 278// spin_with_fp_regs() function. 279#if defined(__x86_64__) 280static_assert(offsetof(zx_thread_state_fp_regs_t, fcw) == 0, ""); 281static_assert(offsetof(zx_thread_state_fp_regs_t, fsw) == 2, ""); 282static_assert(offsetof(zx_thread_state_fp_regs_t, ftw) == 4, ""); 283static_assert(offsetof(zx_thread_state_fp_regs_t, fop) == 6, ""); 284static_assert(offsetof(zx_thread_state_fp_regs_t, fip) == 8, ""); 285static_assert(offsetof(zx_thread_state_fp_regs_t, fdp) == 16, ""); 286static_assert(offsetof(zx_thread_state_fp_regs_t, st) == 32, ""); 287__asm__(".pushsection .text, \"ax\", @progbits\n" 288 ".global spin_with_fp_regs\n" 289 "spin_with_fp_regs:\n" 290 291 // rdi = &zx_thread_state_fp_regs_t.st[0] 292 "lea 32(%rdi), %rdi\n" 293 294 "movq $0x9999, %rax\n" 295 "movq %rax, %xmm0\n" 296 297 "movq 16*0(%rdi), %mm0\n" 298 "movq 16*1(%rdi), %mm1\n" 299 "movq 16*2(%rdi), %mm2\n" 300 "movq 16*3(%rdi), %mm3\n" 301 "movq 16*4(%rdi), %mm4\n" 302 "movq 16*5(%rdi), %mm5\n" 303 "movq 16*6(%rdi), %mm6\n" 304 "movq 16*7(%rdi), %mm7\n" 305 306 "spin_with_fp_regs_spin_address:\n" 307 "jmp spin_with_fp_regs_spin_address\n" 308 ".popsection\n"); 309#elif defined(__aarch64__) 310// Just spins and does nothing. ARM64 doesn't define a separate FP state, but doing this allows the 311// rest of the code to be platform-independent. 312__asm__(".pushsection .text, \"ax\", %progbits\n" 313 ".global spin_with_fp_regs\n" 314 "spin_with_fp_regs:\n" 315 316 // Do nothing. 317 318 "spin_with_fp_regs_spin_address:\n" 319 "b spin_with_fp_regs_spin_address\n" 320 ".popsection\n"); 321#else 322#error Unsupported architecture 323#endif 324 325// spin_with_vector_regs() function. 326#if defined(__x86_64__) 327__asm__(".pushsection .text, \"ax\", @progbits\n" 328 ".global spin_with_vector_regs\n" 329 "spin_with_vector_regs:\n" 330 331 // rdi = zmm[0] on call. This only loads xmm registers which are guaranteed to exist. 332 // Each zmm input is 512 bits = 64 bytes. 333 "movdqu 64*0(%rdi), %xmm0\n" 334 "movdqu 64*1(%rdi), %xmm1\n" 335 "movdqu 64*2(%rdi), %xmm2\n" 336 "movdqu 64*3(%rdi), %xmm3\n" 337 "movdqu 64*4(%rdi), %xmm4\n" 338 "movdqu 64*5(%rdi), %xmm5\n" 339 "movdqu 64*6(%rdi), %xmm6\n" 340 "movdqu 64*7(%rdi), %xmm7\n" 341 "movdqu 64*8(%rdi), %xmm8\n" 342 "movdqu 64*9(%rdi), %xmm9\n" 343 "movdqu 64*10(%rdi), %xmm10\n" 344 "movdqu 64*11(%rdi), %xmm11\n" 345 "movdqu 64*12(%rdi), %xmm12\n" 346 "movdqu 64*13(%rdi), %xmm13\n" 347 "movdqu 64*14(%rdi), %xmm14\n" 348 "movdqu 64*15(%rdi), %xmm15\n" 349 350 "spin_with_vector_regs_spin_address:\n" 351 "jmp spin_with_vector_regs_spin_address\n" 352 ".popsection\n"); 353#elif defined(__aarch64__) 354static_assert(offsetof(zx_thread_state_vector_regs_t, fpcr) == 0, ""); 355static_assert(offsetof(zx_thread_state_vector_regs_t, fpsr) == 4, ""); 356static_assert(offsetof(zx_thread_state_vector_regs_t, v) == 8, ""); 357__asm__(".pushsection .text, \"ax\", %progbits\n" 358 ".global spin_with_vector_regs\n" 359 "spin_with_vector_regs:\n" 360 361 // FPCR and FPSR are first. 362 "ldp w1, w2, [x0]\n" 363 "msr fpcr, x1\n" 364 "msr fpsr, x2\n" 365 366 // Skip to the vector registers. 367 "add x0, x0, 8\n" 368 369 // Each register is 128 bits = 16 bytes, so each pair is 32 bytes. 370 "ldp q0, q1, [x0, #(0 * 32)]\n" 371 "ldp q2, q3, [x0, #(1 * 32)]\n" 372 "ldp q4, q5, [x0, #(2 * 32)]\n" 373 "ldp q6, q7, [x0, #(3 * 32)]\n" 374 "ldp q8, q9, [x0, #(4 * 32)]\n" 375 "ldp q10, q11, [x0, #(5 * 32)]\n" 376 "ldp q12, q13, [x0, #(6 * 32)]\n" 377 "ldp q14, q15, [x0, #(7 * 32)]\n" 378 "ldp q16, q17, [x0, #(8 * 32)]\n" 379 "ldp q18, q19, [x0, #(9 * 32)]\n" 380 "ldp q20, q21, [x0, #(10 * 32)]\n" 381 "ldp q22, q23, [x0, #(11 * 32)]\n" 382 "ldp q24, q25, [x0, #(12 * 32)]\n" 383 "ldp q26, q27, [x0, #(13 * 32)]\n" 384 "ldp q28, q29, [x0, #(14 * 32)]\n" 385 "ldp q30, q31, [x0, #(15 * 32)]\n" 386 387 "spin_with_vector_regs_spin_address:\n" 388 "b spin_with_vector_regs_spin_address\n" 389 ".popsection\n"); 390#else 391#error Unsupported architecture 392#endif 393 394// save_general_regs_and_exit_thread() function. 395#if defined(__x86_64__) 396__asm__(".pushsection .text,\"ax\", @progbits\n" 397 ".global save_general_regs_and_exit_thread\n" 398 "save_general_regs_and_exit_thread:\n" 399 "movq %rax, 8*0(%rsp)\n" 400 "movq %rbx, 8*1(%rsp)\n" 401 "movq %rcx, 8*2(%rsp)\n" 402 "movq %rdx, 8*3(%rsp)\n" 403 "movq %rsi, 8*4(%rsp)\n" 404 "movq %rdi, 8*5(%rsp)\n" 405 "movq %rbp, 8*6(%rsp)\n" 406 "movq %rsp, 8*7(%rsp)\n" 407 "movq %r8, 8*8(%rsp)\n" 408 "movq %r9, 8*9(%rsp)\n" 409 "movq %r10, 8*10(%rsp)\n" 410 "movq %r11, 8*11(%rsp)\n" 411 "movq %r12, 8*12(%rsp)\n" 412 "movq %r13, 8*13(%rsp)\n" 413 "movq %r14, 8*14(%rsp)\n" 414 "movq %r15, 8*15(%rsp)\n" 415 // Save the flags register. 416 "pushfq\n" 417 "popq %rax\n" 418 "movq %rax, 8*17(%rsp)\n" 419 // Fill out the rip field with known value. 420 "leaq save_general_regs_and_exit_thread(%rip), %rax\n" 421 "movq %rax, 8*16(%rsp)\n" 422 "call zx_thread_exit@PLT\n" 423 "ud2\n" 424 ".popsection\n"); 425#elif defined(__aarch64__) 426__asm__(".pushsection .text, \"ax\", %progbits\n" 427 ".global save_general_regs_and_exit_thread\n" 428 "save_general_regs_and_exit_thread:\n" 429 "stp x0, x1, [sp, #8*0]\n" 430 "stp x2, x3, [sp, #8*2]\n" 431 "stp x4, x5, [sp, #8*4]\n" 432 "stp x6, x7, [sp, #8*6]\n" 433 "stp x8, x9, [sp, #8*8]\n" 434 "stp x10, x11, [sp, #8*10]\n" 435 "stp x12, x13, [sp, #8*12]\n" 436 "stp x14, x15, [sp, #8*14]\n" 437 "stp x16, x17, [sp, #8*16]\n" 438 "stp x18, x19, [sp, #8*18]\n" 439 "stp x20, x21, [sp, #8*20]\n" 440 "stp x22, x23, [sp, #8*22]\n" 441 "stp x24, x25, [sp, #8*24]\n" 442 "stp x26, x27, [sp, #8*26]\n" 443 "stp x28, x29, [sp, #8*28]\n" 444 "str x30, [sp, #8*30]\n" 445 // Save the sp register. 446 "mov x0, sp\n" 447 "str x0, [sp, #8*31]\n" 448 // Fill out the pc field with known value. 449 "adr x0, save_general_regs_and_exit_thread\n" 450 "str x0, [sp, #8*32]\n" 451 // Save NZCV flags, a subset of the PSTATE/CPSR register. 452 "mrs x0, nzcv\n" 453 "str x0, [sp, #8*33]\n" 454 "bl zx_thread_exit\n" 455 "brk 0\n" 456 ".popsection\n"); 457#else 458#error Unsupported architecture 459#endif 460 461// save_fp_regs_and_exit_thread() function. 462#if defined(__x86_64__) 463static_assert(offsetof(zx_thread_state_fp_regs, st) == 32, ""); 464__asm__(".pushsection .text,\"ax\", @progbits\n" 465 ".global save_fp_regs_and_exit_thread\n" 466 "save_fp_regs_and_exit_thread:\n" 467 468 // This only saves the low 64 bits, which is the MMX register. Each slot in the struct is 469 // 128 bits so need to add 16 bytes each time. The 32 bytes is the start of the FP regs in 470 // the struct (see static assert above). 471 "movq %mm0, 32 + 16*0(%rsp)\n" 472 "movq %mm1, 32 + 16*1(%rsp)\n" 473 "movq %mm2, 32 + 16*2(%rsp)\n" 474 "movq %mm3, 32 + 16*3(%rsp)\n" 475 "movq %mm4, 32 + 16*4(%rsp)\n" 476 "movq %mm5, 32 + 16*5(%rsp)\n" 477 "movq %mm6, 32 + 16*6(%rsp)\n" 478 "movq %mm7, 32 + 16*7(%rsp)\n" 479 480 "call zx_thread_exit@PLT\n" 481 "ud2\n" 482 ".popsection\n"); 483#elif defined(__aarch64__) 484__asm__(".pushsection .text, \"ax\", %progbits\n" 485 ".global save_fp_regs_and_exit_thread\n" 486 "save_fp_regs_and_exit_thread:\n" 487 488 // Does nothing (no FP values). 489 490 "bl zx_thread_exit\n" 491 "brk 0\n" 492 ".popsection\n"); 493#else 494#error Unsupported architecture 495#endif 496 497// save_vector_regs_and_exit_thread() function. 498#if defined(__x86_64__) 499static_assert(offsetof(zx_thread_state_vector_regs, zmm) == 0, ""); 500__asm__(".pushsection .text,\"ax\", @progbits\n" 501 ".global save_vector_regs_and_exit_thread\n" 502 "save_vector_regs_and_exit_thread:\n" 503 504 // Each vector is 512 bits (64 bytes). We only read the first 128 (xmm registers). 505 "movdqu %xmm0, 64*0(%rsp)\n" 506 "movdqu %xmm1, 64*1(%rsp)\n" 507 "movdqu %xmm2, 64*2(%rsp)\n" 508 "movdqu %xmm3, 64*3(%rsp)\n" 509 "movdqu %xmm4, 64*4(%rsp)\n" 510 "movdqu %xmm5, 64*5(%rsp)\n" 511 "movdqu %xmm6, 64*6(%rsp)\n" 512 "movdqu %xmm7, 64*7(%rsp)\n" 513 "movdqu %xmm8, 64*8(%rsp)\n" 514 "movdqu %xmm9, 64*9(%rsp)\n" 515 "movdqu %xmm10, 64*10(%rsp)\n" 516 "movdqu %xmm11, 64*11(%rsp)\n" 517 "movdqu %xmm12, 64*12(%rsp)\n" 518 "movdqu %xmm13, 64*13(%rsp)\n" 519 "movdqu %xmm14, 64*14(%rsp)\n" 520 "movdqu %xmm15, 64*15(%rsp)\n" 521 522 "call zx_thread_exit@PLT\n" 523 "ud2\n" 524 ".popsection\n"); 525#elif defined(__aarch64__) 526__asm__(".pushsection .text, \"ax\", %progbits\n" 527 ".global save_vector_regs_and_exit_thread\n" 528 "save_vector_regs_and_exit_thread:\n" 529 530 // Input is in SP. 531 "mov x0, sp\n" 532 533 // FPCR and FPSR. 534 "mrs x1, fpcr\n" 535 "mrs x2, fpsr\n" 536 "stp w1, w2, [x0]\n" 537 538 // Skip to the vector registers 539 "add x0, x0, 8\n" 540 541 // Each register is 128 bits = 16 bytes, so each pair is 32 bytes. 542 "stp q0, q1, [x0, #(0 * 32)]\n" 543 "stp q2, q3, [x0, #(1 * 32)]\n" 544 "stp q4, q5, [x0, #(2 * 32)]\n" 545 "stp q6, q7, [x0, #(3 * 32)]\n" 546 "stp q8, q9, [x0, #(4 * 32)]\n" 547 "stp q10, q11, [x0, #(5 * 32)]\n" 548 "stp q12, q13, [x0, #(6 * 32)]\n" 549 "stp q14, q15, [x0, #(7 * 32)]\n" 550 "stp q16, q17, [x0, #(8 * 32)]\n" 551 "stp q18, q19, [x0, #(9 * 32)]\n" 552 "stp q20, q21, [x0, #(10 * 32)]\n" 553 "stp q22, q23, [x0, #(11 * 32)]\n" 554 "stp q24, q25, [x0, #(12 * 32)]\n" 555 "stp q26, q27, [x0, #(13 * 32)]\n" 556 "stp q28, q29, [x0, #(14* 32)]\n" 557 "stp q30, q31, [x0, #(15 * 32)]\n" 558 559 "bl zx_thread_exit\n" 560 "brk 0\n" 561 ".popsection\n"); 562#else 563#error Unsupported architecture 564#endif 565