1// Copyright 2016 The Fuchsia Authors 2// 3// Use of this source code is governed by a MIT-style 4// license that can be found in the LICENSE file or at 5// https://opensource.org/licenses/MIT 6 7/**************************************************************************** 8 * This file handles detection of supported extended register saving 9 * mechanisms. Of the ones detected, the following is our preference for 10 * mechanisms, from best to worst: 11 * 12 * 1) XSAVES (performs modified+init optimizations, uses compressed register 13 * form, and can save supervisor-only registers) 14 * 2) XSAVEOPT (performs modified+init optimizations) 15 * 3) XSAVE (no optimizations/compression, but can save all supported extended 16 * registers) 17 * 4) FXSAVE (can only save FPU/SSE registers) 18 * 5) none (will not save any extended registers, will not allow enabling 19 * features that use extended registers.) 20 ****************************************************************************/ 21 22#include <arch/ops.h> 23#include <arch/x86.h> 24#include <arch/x86/feature.h> 25#include <arch/x86/mp.h> 26#include <arch/x86/proc_trace.h> 27#include <arch/x86/registers.h> 28#include <fbl/auto_call.h> 29#include <inttypes.h> 30#include <kernel/auto_lock.h> 31#include <kernel/spinlock.h> 32#include <kernel/thread.h> 33#include <string.h> 34#include <trace.h> 35#include <zircon/compiler.h> 36 37#define LOCAL_TRACE 0 38 39#define IA32_XSS_MSR 0xDA0 40 41// Offset in xsave area that components >= 2 start at. 42#define XSAVE_EXTENDED_AREA_OFFSET 576 43 44// The first xsave component in the extended (non-legacy) area. 45#define XSAVE_FIRST_EXT_COMPONENT 2 46 47// Number of possible components in the state vector. 48#define XSAVE_MAX_COMPONENTS 63 49 50// Bit in XCOMP_BV field of xsave indicating compacted format. 51#define XSAVE_XCOMP_BV_COMPACT (1ULL << 63) 52 53static void fxsave(void* register_state); 54static void fxrstor(void* register_state); 55static void xrstor(void* register_state, uint64_t feature_mask); 56static void xrstors(void* register_state, uint64_t feature_mask); 57static void xsave(void* register_state, uint64_t feature_mask); 58static void xsaveopt(void* register_state, uint64_t feature_mask); 59static void xsaves(void* register_state, uint64_t feature_mask); 60 61static void read_xsave_state_info(void); 62static void recompute_state_size(void); 63 64// Indexed by component. Components 0 and 1 are the "legacy" floating point and 65// SSE ones. These do not have a size or align64 set in this structure since 66// they are inside the legacy xsave area. Use XSAVE_FIRST_EXT_COMPONENT for 67// the first valid entry. 68static struct { 69 // Total size of this component in bytes. 70 uint32_t size; 71 72 // If true, this component must be aligned to a 64-byte boundary. 73 bool align64; 74} state_components[XSAVE_MAX_COMPONENTS]; 75 76/* Supported bits in XCR0 (each corresponds to a state component) */ 77static uint64_t xcr0_component_bitmap = 0; 78/* Supported bits in IA32_XSS (each corresponds to a state component) */ 79static uint64_t xss_component_bitmap = 0; 80/* Maximum total size for xsave, if all features are enabled */ 81static size_t xsave_max_area_size = 0; 82/* Does this processor support the XSAVES instruction */ 83static bool xsaves_supported = false; 84/* Does this processor support the XSAVEOPT instruction */ 85static bool xsaveopt_supported = false; 86/* Does this processor support the XGETBV instruction with ecx=1 */ 87static bool xgetbv_1_supported = false; 88/* Does this processor support the XSAVE instruction */ 89static bool xsave_supported = false; 90/* Does this processor support FXSAVE */ 91static bool fxsave_supported = false; 92/* Maximum register state size */ 93static size_t register_state_size = 0; 94/* Spinlock to guard register state size changes */ 95static SpinLock state_lock; 96 97/* For FXRSTOR, we need 512 bytes to save the state. For XSAVE-based 98 * mechanisms, we only need 512 + 64 bytes for the initial state, since 99 * our initial state only needs to specify some SSE state (masking exceptions), 100 * and XSAVE doesn't require space for any disabled register groups after 101 * the last enabled one. */ 102static uint8_t __ALIGNED(64) 103 extended_register_init_state[512 + 64] = {0}; 104 105static_assert(sizeof(x86_xsave_legacy_area) == 416, "Size of legacy xsave area should match spec."); 106 107/* Format described in Intel 3A section 13.4 */ 108struct xsave_area { 109 // Always valid, even when using the older fxsave. 110 x86_xsave_legacy_area legacy; 111 112 uint8_t reserved1[96]; 113 114 // The xsave header. It and the extended regions are only valid when using xsave, not fxsave. 115 uint64_t xstate_bv; 116 uint64_t xcomp_bv; 117 uint8_t reserved2[48]; 118 119 uint8_t extended_region[]; 120} __PACKED; 121static_assert(offsetof(xsave_area, extended_region) == XSAVE_EXTENDED_AREA_OFFSET, 122 "xsave_area format should match CPU spec."); 123 124static void x86_extended_register_cpu_init(void) { 125 if (likely(xsave_supported)) { 126 ulong cr4 = x86_get_cr4(); 127 /* Enable XSAVE feature set */ 128 x86_set_cr4(cr4 | X86_CR4_OSXSAVE); 129 /* Put xcr0 into a known state (X87 must be enabled in this register) */ 130 x86_xsetbv(0, X86_XSAVE_STATE_BIT_X87); 131 } 132 133 /* Enable the FPU */ 134 __UNUSED bool enabled = x86_extended_register_enable_feature( 135 X86_EXTENDED_REGISTER_X87); 136 DEBUG_ASSERT(enabled); 137} 138 139// Sets the portions of the xsave legacy area such that the x87 state is considered in its "initial 140// configuration" as defined by Intel Vol 1 section 13.6. 141// 142// "The x87 state component comprises bytes 23:0 and bytes 159:32." This doesn't count the MXCSR 143// register. 144static void set_x87_initial_state(x86_xsave_legacy_area* legacy_area) { 145 legacy_area->fcw = 0x037f; 146 legacy_area->fsw = 0; 147 // The initial value of the FTW register is 0xffff. The FTW field in the xsave area is an 148 // abbreviated version (see Intel manual sec 13.5.1). In the FTW register 1 bits indicate 149 // the empty tag (two per register), while the abbreviated version uses 1 bit per register and 150 // 0 indicates empty. So set to 0 to indicate all registers are empty. 151 legacy_area->ftw = 0; 152 legacy_area->fop = 0; 153 legacy_area->fip = 0; 154 legacy_area->fdp = 0; 155 156 // Register values are all 0. 157 constexpr size_t fp_reg_size = sizeof(legacy_area->st); 158 static_assert(fp_reg_size == 128, "Struct size is wrong"); 159 memset(&legacy_area->st[0], 0, fp_reg_size); 160} 161 162// SSE state is only the XMM registers which is all 0 and does not count MXCSR as defined by Intel 163// Vol 1 section 13.6. 164static void set_sse_initial_state(x86_xsave_legacy_area* legacy_area) { 165 constexpr size_t sse_reg_size = sizeof(legacy_area->xmm); 166 static_assert(sse_reg_size == 256, "Struct size is wrong"); 167 memset(&legacy_area->xmm[0], 0, sse_reg_size); 168} 169 170/* Figure out what forms of register saving this machine supports and 171 * select the best one */ 172void x86_extended_register_init(void) { 173 /* Have we already read the cpu support info */ 174 static bool info_initialized = false; 175 bool initialized_cpu_already = false; 176 177 if (!info_initialized) { 178 DEBUG_ASSERT(arch_curr_cpu_num() == 0); 179 180 read_xsave_state_info(); 181 info_initialized = true; 182 183 /* We currently assume that if xsave isn't support fxsave is */ 184 fxsave_supported = x86_feature_test(X86_FEATURE_FXSR); 185 186 /* Set up initial states */ 187 if (likely(fxsave_supported || xsave_supported)) { 188 x86_extended_register_cpu_init(); 189 initialized_cpu_already = true; 190 191 // Intel Vol 3 section 13.5.4 describes the XSAVE initialization. The only change we 192 // want to make to the init state is having SIMD exceptions masked. The "legacy" area 193 // of the xsave structure is valid for fxsave as well. 194 xsave_area* area = reinterpret_cast<xsave_area*>(extended_register_init_state); 195 set_x87_initial_state(&area->legacy); 196 set_sse_initial_state(&area->legacy); 197 area->legacy.mxcsr = 0x3f << 7; 198 199 if (xsave_supported) { 200 area->xstate_bv |= X86_XSAVE_STATE_BIT_SSE; 201 202 /* If xsaves is being used, then make the saved state be in 203 * compact form. xrstors will GPF if it is not. */ 204 if (xsaves_supported) { 205 area->xcomp_bv |= XSAVE_XCOMP_BV_COMPACT; 206 area->xcomp_bv |= area->xstate_bv; 207 } 208 } 209 } 210 211 if (likely(xsave_supported)) { 212 recompute_state_size(); 213 } else if (fxsave_supported) { 214 register_state_size = 512; 215 } 216 } 217 /* Ensure that xsaves_supported == true implies xsave_supported == true */ 218 DEBUG_ASSERT(!xsaves_supported || xsave_supported); 219 /* Ensure that xsaveopt_supported == true implies xsave_supported == true */ 220 DEBUG_ASSERT(!xsaveopt_supported || xsave_supported); 221 222 if (!initialized_cpu_already) { 223 x86_extended_register_cpu_init(); 224 } 225} 226 227bool x86_extended_register_enable_feature( 228 enum x86_extended_register_feature feature) { 229 /* We currently assume this is only called during initialization. 230 * We rely on interrupts being disabled so xgetbv/xsetbv will not be 231 * racey */ 232 DEBUG_ASSERT(arch_ints_disabled()); 233 234 switch (feature) { 235 case X86_EXTENDED_REGISTER_X87: { 236 if (unlikely(!x86_feature_test(X86_FEATURE_FPU) || 237 (!fxsave_supported && !xsave_supported))) { 238 return false; 239 } 240 241 /* No x87 emul, monitor co-processor */ 242 ulong cr0 = x86_get_cr0(); 243 cr0 &= ~X86_CR0_EM; 244 cr0 |= X86_CR0_NE; 245 cr0 |= X86_CR0_MP; 246 x86_set_cr0(cr0); 247 248 /* Init x87, starts with exceptions masked */ 249 __asm__ __volatile__("finit" 250 : 251 : 252 : "memory"); 253 254 if (likely(xsave_supported)) { 255 x86_xsetbv(0, x86_xgetbv(0) | X86_XSAVE_STATE_BIT_X87); 256 } 257 break; 258 } 259 case X86_EXTENDED_REGISTER_SSE: { 260 if (unlikely( 261 !x86_feature_test(X86_FEATURE_SSE) || 262 !x86_feature_test(X86_FEATURE_FXSR))) { 263 264 return false; 265 } 266 267 /* Init SSE */ 268 ulong cr4 = x86_get_cr4(); 269 cr4 |= X86_CR4_OSXMMEXPT; 270 cr4 |= X86_CR4_OSFXSR; 271 x86_set_cr4(cr4); 272 273 /* mask all exceptions */ 274 uint32_t mxcsr = 0; 275 __asm__ __volatile__("stmxcsr %0" 276 : "=m"(mxcsr)); 277 mxcsr = (0x3f << 7); 278 __asm__ __volatile__("ldmxcsr %0" 279 : 280 : "m"(mxcsr)); 281 282 if (likely(xsave_supported)) { 283 x86_xsetbv(0, x86_xgetbv(0) | X86_XSAVE_STATE_BIT_SSE); 284 } 285 break; 286 } 287 case X86_EXTENDED_REGISTER_AVX: { 288 if (!xsave_supported || 289 !(xcr0_component_bitmap & X86_XSAVE_STATE_BIT_AVX)) { 290 return false; 291 } 292 293 /* Enable SIMD exceptions */ 294 ulong cr4 = x86_get_cr4(); 295 cr4 |= X86_CR4_OSXMMEXPT; 296 x86_set_cr4(cr4); 297 298 x86_xsetbv(0, x86_xgetbv(0) | X86_XSAVE_STATE_BIT_AVX); 299 break; 300 } 301 case X86_EXTENDED_REGISTER_MPX: { 302 /* Currently unsupported */ 303 return false; 304 } 305 case X86_EXTENDED_REGISTER_AVX512: { 306 const uint64_t xsave_avx512 = 307 X86_XSAVE_STATE_BIT_AVX512_OPMASK | 308 X86_XSAVE_STATE_BIT_AVX512_LOWERZMM_HIGH | 309 X86_XSAVE_STATE_BIT_AVX512_HIGHERZMM; 310 311 if (!xsave_supported || 312 (xcr0_component_bitmap & xsave_avx512) != xsave_avx512) { 313 return false; 314 } 315 x86_xsetbv(0, x86_xgetbv(0) | xsave_avx512); 316 break; 317 } 318 case X86_EXTENDED_REGISTER_PT: { 319 if (!xsaves_supported || 320 !(xss_component_bitmap & X86_XSAVE_STATE_BIT_PT)) { 321 return false; 322 } 323 x86_set_extended_register_pt_state(true); 324 break; 325 } 326 case X86_EXTENDED_REGISTER_PKRU: { 327 /* Currently unsupported */ 328 return false; 329 } 330 default: 331 return false; 332 } 333 334 recompute_state_size(); 335 return true; 336} 337 338size_t x86_extended_register_size(void) { 339 return register_state_size; 340} 341 342void x86_extended_register_init_state(void* register_state) { 343 // Copy the initialization state; this overcopies on systems that fall back 344 // to fxsave, but the buffer is required to be large enough. 345 memcpy(register_state, extended_register_init_state, sizeof(extended_register_init_state)); 346} 347 348void x86_extended_register_save_state(void* register_state) { 349 /* The idle threads have no extended register state */ 350 if (unlikely(!register_state)) { 351 return; 352 } 353 354 if (xsaves_supported) { 355 xsaves(register_state, ~0ULL); 356 } else if (xsaveopt_supported) { 357 xsaveopt(register_state, ~0ULL); 358 } else if (xsave_supported) { 359 xsave(register_state, ~0ULL); 360 } else if (fxsave_supported) { 361 fxsave(register_state); 362 } 363} 364 365void x86_extended_register_restore_state(void* register_state) { 366 /* The idle threads have no extended register state */ 367 if (unlikely(!register_state)) { 368 return; 369 } 370 371 if (xsaves_supported) { 372 xrstors(register_state, ~0ULL); 373 } else if (xsave_supported) { 374 xrstor(register_state, ~0ULL); 375 } else if (fxsave_supported) { 376 fxrstor(register_state); 377 } 378} 379 380void x86_extended_register_context_switch( 381 thread_t* old_thread, thread_t* new_thread) { 382 if (likely(old_thread)) { 383 x86_extended_register_save_state(old_thread->arch.extended_register_state); 384 } 385 x86_extended_register_restore_state(new_thread->arch.extended_register_state); 386} 387 388static void read_xsave_state_info(void) { 389 xsave_supported = x86_feature_test(X86_FEATURE_XSAVE); 390 if (!xsave_supported) { 391 LTRACEF("xsave not supported\n"); 392 return; 393 } 394 395 /* if we bail, set everything to unsupported */ 396 auto ac = fbl::MakeAutoCall([]() { 397 xsave_supported = false; 398 xsaves_supported = false; 399 xsaveopt_supported = false; 400 }); 401 402 /* This procedure is described in Intel Vol 1 section 13.2 */ 403 404 /* Read feature support from subleaves 0 and 1 */ 405 struct cpuid_leaf leaf; 406 if (!x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf)) { 407 LTRACEF("could not find xsave leaf\n"); 408 return; 409 } 410 xcr0_component_bitmap = ((uint64_t)leaf.d << 32) | leaf.a; 411 size_t max_area = XSAVE_EXTENDED_AREA_OFFSET; 412 413 x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 1, &leaf); 414 xgetbv_1_supported = !!(leaf.a & (1 << 2)); 415 xsaves_supported = !!(leaf.a & (1 << 3)); 416 xsaveopt_supported = !!(leaf.a & (1 << 0)); 417 xss_component_bitmap = ((uint64_t)leaf.d << 32) | leaf.c; 418 419 LTRACEF("xcr0 bitmap: %016" PRIx64 "\n", xcr0_component_bitmap); 420 LTRACEF("xss bitmap: %016" PRIx64 "\n", xss_component_bitmap); 421 422 /* Sanity check; all CPUs that support xsave support components 0 and 1 */ 423 DEBUG_ASSERT((xcr0_component_bitmap & 0x3) == 0x3); 424 if ((xcr0_component_bitmap & 0x3) != 0x3) { 425 LTRACEF("unexpected xcr0 bitmap %016" PRIx64 "\n", 426 xcr0_component_bitmap); 427 return; 428 } 429 430 /* we're okay from now on out */ 431 ac.cancel(); 432 433 /* Read info about the state components */ 434 for (int i = XSAVE_FIRST_EXT_COMPONENT; i < XSAVE_MAX_COMPONENTS; ++i) { 435 if (!(xcr0_component_bitmap & (1ULL << i)) && 436 !(xss_component_bitmap & (1ULL << i))) { 437 continue; 438 } 439 x86_get_cpuid_subleaf(X86_CPUID_XSAVE, i, &leaf); 440 441 bool align64 = !!(leaf.c & 0x2); 442 443 state_components[i].size = leaf.a; 444 state_components[i].align64 = align64; 445 LTRACEF("component %d size: %u (xcr0 %d)\n", 446 i, state_components[i].size, 447 !!(xcr0_component_bitmap & (1ULL << i))); 448 449 if (align64) { 450 max_area = ROUNDUP(max_area, 64); 451 } 452 max_area += leaf.a; 453 } 454 xsave_max_area_size = max_area; 455 LTRACEF("total xsave size: %zu\n", max_area); 456 457 return; 458} 459 460static void recompute_state_size(void) { 461 if (!xsave_supported) { 462 return; 463 } 464 465 size_t new_size = 0; 466 /* If we're in a compacted form, compute the total size. The algorithm 467 * for this is defined in Intel Vol 1 section 13.4.3 */ 468 if (xsaves_supported) { 469 new_size = XSAVE_EXTENDED_AREA_OFFSET; 470 uint64_t enabled_features = x86_xgetbv(0) | read_msr(IA32_XSS_MSR); 471 for (int i = XSAVE_FIRST_EXT_COMPONENT; i < XSAVE_MAX_COMPONENTS; ++i) { 472 if (!(enabled_features & (1ULL << i))) { 473 continue; 474 } 475 476 if (state_components[i].align64) { 477 new_size = ROUNDUP(new_size, 64); 478 } 479 new_size += state_components[i].size; 480 } 481 } else { 482 /* Otherwise, use CPUID.(EAX=0xD,ECX=1):EBX, which stores the computed 483 * maximum size required for saving everything specified in XCR0 */ 484 struct cpuid_leaf leaf; 485 x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf); 486 new_size = leaf.b; 487 } 488 489 AutoSpinLockNoIrqSave guard(&state_lock); 490 /* Only allow size to increase; all CPUs should converge to the same value, 491 * but for sanity let's keep it monotonically increasing */ 492 if (new_size > register_state_size) { 493 register_state_size = new_size; 494 DEBUG_ASSERT(register_state_size <= X86_MAX_EXTENDED_REGISTER_SIZE); 495 } 496} 497 498static void fxsave(void* register_state) { 499 __asm__ __volatile__("fxsave %0" 500 : "=m"(*(uint8_t*)register_state) 501 : 502 : "memory"); 503} 504 505static void fxrstor(void* register_state) { 506 __asm__ __volatile__("fxrstor %0" 507 : 508 : "m"(*(uint8_t*)register_state) 509 : "memory"); 510} 511 512static void xrstor(void* register_state, uint64_t feature_mask) { 513 __asm__ volatile("xrstor %0" 514 : 515 : "m"(*(uint8_t*)register_state), 516 "d"((uint32_t)(feature_mask >> 32)), 517 "a"((uint32_t)feature_mask) 518 : "memory"); 519} 520 521static void xrstors(void* register_state, uint64_t feature_mask) { 522 __asm__ volatile("xrstors %0" 523 : 524 : "m"(*(uint8_t*)register_state), 525 "d"((uint32_t)(feature_mask >> 32)), 526 "a"((uint32_t)feature_mask) 527 : "memory"); 528} 529 530static void xsave(void* register_state, uint64_t feature_mask) { 531 __asm__ volatile("xsave %0" 532 : "+m"(*(uint8_t*)register_state) 533 : "d"((uint32_t)(feature_mask >> 32)), 534 "a"((uint32_t)feature_mask) 535 : "memory"); 536} 537 538static void xsaveopt(void* register_state, uint64_t feature_mask) { 539 __asm__ volatile("xsaveopt %0" 540 : "+m"(*(uint8_t*)register_state) 541 : "d"((uint32_t)(feature_mask >> 32)), 542 "a"((uint32_t)feature_mask) 543 : "memory"); 544} 545 546static void xsaves(void* register_state, uint64_t feature_mask) { 547 __asm__ volatile("xsaves %0" 548 : "+m"(*(uint8_t*)register_state) 549 : "d"((uint32_t)(feature_mask >> 32)), 550 "a"((uint32_t)feature_mask) 551 : "memory"); 552} 553 554uint64_t x86_xgetbv(uint32_t reg) { 555 uint32_t hi, lo; 556 __asm__ volatile("xgetbv" 557 : "=d"(hi), "=a"(lo) 558 : "c"(reg) 559 : "memory"); 560 return ((uint64_t)hi << 32) + lo; 561} 562 563void x86_xsetbv(uint32_t reg, uint64_t val) { 564 __asm__ volatile("xsetbv" 565 : 566 : "c"(reg), "d"((uint32_t)(val >> 32)), "a"((uint32_t)val) 567 : "memory"); 568} 569 570void* x86_get_extended_register_state_component(void* register_state, uint32_t component, 571 bool mark_present, uint32_t* size) { 572 if (component >= XSAVE_MAX_COMPONENTS) { 573 *size = 0; 574 return nullptr; 575 } 576 577 xsave_area* area = reinterpret_cast<xsave_area*>(register_state); 578 579 uint64_t state_component_bit = (1ul << component); 580 581 // Components 0 and 1 are special and are always present in the legacy area. 582 if (component <= 1) { 583 *size = sizeof(x86_xsave_legacy_area); 584 if (!(area->xstate_bv & state_component_bit)) { 585 // Component not written because registers were in the initial configuration. Set it so 586 // the caller sees the correct initial values. 587 if (component == 0) { 588 set_x87_initial_state(&area->legacy); 589 } else { 590 set_sse_initial_state(&area->legacy); 591 } 592 if (mark_present) { 593 area->xstate_bv |= state_component_bit; 594 } 595 } 596 597 return area; 598 } 599 600 if (!(area->xcomp_bv & XSAVE_XCOMP_BV_COMPACT)) { 601 // Standard format. The offset and size are provided by a static CPUID call. 602 cpuid_leaf leaf; 603 x86_get_cpuid_subleaf(X86_CPUID_XSAVE, component, &leaf); 604 *size = leaf.a; 605 if (leaf.a == 0) { 606 return nullptr; 607 } 608 uint8_t* component_begin = static_cast<uint8_t*>(register_state) + leaf.b; 609 610 if (!(area->xstate_bv & state_component_bit)) { 611 // Component not written because it's in the initial state. Write the initial values to 612 // the structure the caller sees the correct data. The initial state of all non-x87 613 // xsave components (x87 is handled above) is all 0's. 614 memset(component_begin, 0, *size); 615 if (mark_present) { 616 area->xstate_bv |= state_component_bit; 617 } 618 } 619 return component_begin; 620 } 621 622 // Compacted format used. The corresponding bit in xcomp_bv indicates whether the component is 623 // present. 624 if (!(area->xcomp_bv & state_component_bit)) { 625 // Currently this doesn't support reading or writing compacted components that aren't 626 // currently marked present. In the future, we may want to add this which will require 627 // rewriting all the following components. 628 *size = 0; 629 return nullptr; 630 } 631 632 // Walk all present components and add up their sizes (optionally aligned up) to get the offset. 633 uint32_t offset = XSAVE_EXTENDED_AREA_OFFSET; 634 for (uint32_t i = XSAVE_FIRST_EXT_COMPONENT; i < component; i++) { 635 if (!(area->xcomp_bv & (1ul << i))) { 636 continue; 637 } 638 if (state_components[i].align64) { 639 offset = ROUNDUP(offset, 64); 640 } 641 offset += state_components[i].size; 642 } 643 if (state_components[component].align64) { 644 offset = ROUNDUP(offset, 64); 645 } 646 647 uint8_t* component_begin = static_cast<uint8_t*>(register_state) + offset; 648 *size = state_components[component].size; 649 650 if (!(area->xstate_bv & state_component_bit)) { 651 // Component not written because it's in the initial state. Write the initial values to 652 // the structure the caller sees the correct data. The initial state of all non-x87 653 // xsave components (x87 is handled above) is all 0's. 654 memset(component_begin, 0, *size); 655 if (mark_present) { 656 area->xstate_bv |= state_component_bit; 657 } 658 } 659 return component_begin; 660} 661 662// Set the extended register PT mode to trace either cpus (!threads) 663// or threads. 664// WARNING: All PT MSRs should be set to init values before changing the mode. 665// See x86_ipt_set_mode_task. 666 667void x86_set_extended_register_pt_state(bool threads) { 668 if (!xsaves_supported || !(xss_component_bitmap & X86_XSAVE_STATE_BIT_PT)) 669 return; 670 671 uint64_t xss = read_msr(IA32_XSS_MSR); 672 if (threads) 673 xss |= X86_XSAVE_STATE_BIT_PT; 674 else 675 xss &= ~(0ULL + X86_XSAVE_STATE_BIT_PT); 676 write_msr(IA32_XSS_MSR, xss); 677} 678