1130803Smarcel// SPDX-License-Identifier: GPL-2.0 2130803Smarcel/* 3130803Smarcel * xapic_ipi_test 4130803Smarcel * 5130803Smarcel * Copyright (C) 2020, Google LLC. 6130803Smarcel * 7130803Smarcel * This work is licensed under the terms of the GNU GPL, version 2. 8130803Smarcel * 9130803Smarcel * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake 10130803Smarcel * another vCPU that is halted when KVM's backing page for the APIC access 11130803Smarcel * address has been moved by mm. 12130803Smarcel * 13130803Smarcel * The test starts two vCPUs: one that sends IPIs and one that continually 14130803Smarcel * executes HLT. The sender checks that the halter has woken from the HLT and 15130803Smarcel * has reentered HLT before sending the next IPI. While the vCPUs are running, 16130803Smarcel * the host continually calls migrate_pages to move all of the process' pages 17130803Smarcel * amongst the available numa nodes on the machine. 18130803Smarcel * 19130803Smarcel * Migration is a command line option. When used on non-numa machines will 20130803Smarcel * exit with error. Test is still usefull on non-numa for testing IPIs. 21130803Smarcel */ 22130803Smarcel 23130803Smarcel#define _GNU_SOURCE /* for program_invocation_short_name */ 24130803Smarcel#include <getopt.h> 25130803Smarcel#include <pthread.h> 26130803Smarcel#include <inttypes.h> 27130803Smarcel#include <string.h> 28130803Smarcel#include <time.h> 29130803Smarcel 30130803Smarcel#include "kvm_util.h" 31130803Smarcel#include "numaif.h" 32130803Smarcel#include "processor.h" 33130803Smarcel#include "test_util.h" 34130803Smarcel#include "vmx.h" 35130803Smarcel 36130803Smarcel/* Default running time for the test */ 37130803Smarcel#define DEFAULT_RUN_SECS 3 38130803Smarcel 39130803Smarcel/* Default delay between migrate_pages calls (microseconds) */ 40130803Smarcel#define DEFAULT_DELAY_USECS 500000 41130803Smarcel 42130803Smarcel/* 43130803Smarcel * Vector for IPI from sender vCPU to halting vCPU. 44130803Smarcel * Value is arbitrary and was chosen for the alternating bit pattern. Any 45130803Smarcel * value should work. 46130803Smarcel */ 47130803Smarcel#define IPI_VECTOR 0xa5 48130803Smarcel 49130803Smarcel/* 50130803Smarcel * Incremented in the IPI handler. Provides evidence to the sender that the IPI 51130803Smarcel * arrived at the destination 52130803Smarcel */ 53130803Smarcelstatic volatile uint64_t ipis_rcvd; 54130803Smarcel 55130803Smarcel/* Data struct shared between host main thread and vCPUs */ 56130803Smarcelstruct test_data_page { 57130803Smarcel uint32_t halter_apic_id; 58130803Smarcel volatile uint64_t hlt_count; 59130803Smarcel volatile uint64_t wake_count; 60130803Smarcel uint64_t ipis_sent; 61130803Smarcel uint64_t migrations_attempted; 62130803Smarcel uint64_t migrations_completed; 63130803Smarcel uint32_t icr; 64130803Smarcel uint32_t icr2; 65130803Smarcel uint32_t halter_tpr; 66130803Smarcel uint32_t halter_ppr; 67130803Smarcel 68130803Smarcel /* 69130803Smarcel * Record local version register as a cross-check that APIC access 70130803Smarcel * worked. Value should match what KVM reports (APIC_VERSION in 71130803Smarcel * arch/x86/kvm/lapic.c). If test is failing, check that values match 72130803Smarcel * to determine whether APIC access exits are working. 73130803Smarcel */ 74130803Smarcel uint32_t halter_lvr; 75130803Smarcel}; 76130803Smarcel 77130803Smarcelstruct thread_params { 78130803Smarcel struct test_data_page *data; 79130803Smarcel struct kvm_vcpu *vcpu; 80130803Smarcel uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ 81130803Smarcel}; 82130803Smarcel 83130803Smarcelvoid verify_apic_base_addr(void) 84130803Smarcel{ 85130803Smarcel uint64_t msr = rdmsr(MSR_IA32_APICBASE); 86130803Smarcel uint64_t base = GET_APIC_BASE(msr); 87130803Smarcel 88130803Smarcel GUEST_ASSERT(base == APIC_DEFAULT_GPA); 89130803Smarcel} 90130803Smarcel 91130803Smarcelstatic void halter_guest_code(struct test_data_page *data) 92130803Smarcel{ 93130803Smarcel verify_apic_base_addr(); 94130803Smarcel xapic_enable(); 95130803Smarcel 96130803Smarcel data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); 97130803Smarcel data->halter_lvr = xapic_read_reg(APIC_LVR); 98130803Smarcel 99130803Smarcel /* 100130803Smarcel * Loop forever HLTing and recording halts & wakes. Disable interrupts 101130803Smarcel * each time around to minimize window between signaling the pending 102130803Smarcel * halt to the sender vCPU and executing the halt. No need to disable on 103130803Smarcel * first run as this vCPU executes first and the host waits for it to 104130803Smarcel * signal going into first halt before starting the sender vCPU. Record 105130803Smarcel * TPR and PPR for diagnostic purposes in case the test fails. 106130803Smarcel */ 107130803Smarcel for (;;) { 108130803Smarcel data->halter_tpr = xapic_read_reg(APIC_TASKPRI); 109130803Smarcel data->halter_ppr = xapic_read_reg(APIC_PROCPRI); 110130803Smarcel data->hlt_count++; 111130803Smarcel asm volatile("sti; hlt; cli"); 112130803Smarcel data->wake_count++; 113130803Smarcel } 114130803Smarcel} 115130803Smarcel 116130803Smarcel/* 117130803Smarcel * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to 118130803Smarcel * enable diagnosing errant writes to the APIC access address backing page in 119130803Smarcel * case of test failure. 120130803Smarcel */ 121130803Smarcelstatic void guest_ipi_handler(struct ex_regs *regs) 122130803Smarcel{ 123130803Smarcel ipis_rcvd++; 124130803Smarcel xapic_write_reg(APIC_EOI, 77); 125130803Smarcel} 126130803Smarcel 127130803Smarcelstatic void sender_guest_code(struct test_data_page *data) 128130803Smarcel{ 129130803Smarcel uint64_t last_wake_count; 130130803Smarcel uint64_t last_hlt_count; 131130803Smarcel uint64_t last_ipis_rcvd_count; 132130803Smarcel uint32_t icr_val; 133130803Smarcel uint32_t icr2_val; 134130803Smarcel uint64_t tsc_start; 135130803Smarcel 136130803Smarcel verify_apic_base_addr(); 137130803Smarcel xapic_enable(); 138130803Smarcel 139130803Smarcel /* 140130803Smarcel * Init interrupt command register for sending IPIs 141130803Smarcel * 142130803Smarcel * Delivery mode=fixed, per SDM: 143130803Smarcel * "Delivers the interrupt specified in the vector field to the target 144130803Smarcel * processor." 145130803Smarcel * 146130803Smarcel * Destination mode=physical i.e. specify target by its local APIC 147130803Smarcel * ID. This vCPU assumes that the halter vCPU has already started and 148130803Smarcel * set data->halter_apic_id. 149130803Smarcel */ 150130803Smarcel icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); 151130803Smarcel icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); 152130803Smarcel data->icr = icr_val; 153130803Smarcel data->icr2 = icr2_val; 154130803Smarcel 155130803Smarcel last_wake_count = data->wake_count; 156130803Smarcel last_hlt_count = data->hlt_count; 157130803Smarcel last_ipis_rcvd_count = ipis_rcvd; 158130803Smarcel for (;;) { 159130803Smarcel /* 160130803Smarcel * Send IPI to halter vCPU. 161130803Smarcel * First IPI can be sent unconditionally because halter vCPU 162130803Smarcel * starts earlier. 163130803Smarcel */ 164 xapic_write_reg(APIC_ICR2, icr2_val); 165 xapic_write_reg(APIC_ICR, icr_val); 166 data->ipis_sent++; 167 168 /* 169 * Wait up to ~1 sec for halter to indicate that it has: 170 * 1. Received the IPI 171 * 2. Woken up from the halt 172 * 3. Gone back into halt 173 * Current CPUs typically run at 2.x Ghz which is ~2 174 * billion ticks per second. 175 */ 176 tsc_start = rdtsc(); 177 while (rdtsc() - tsc_start < 2000000000) { 178 if ((ipis_rcvd != last_ipis_rcvd_count) && 179 (data->wake_count != last_wake_count) && 180 (data->hlt_count != last_hlt_count)) 181 break; 182 } 183 184 GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && 185 (data->wake_count != last_wake_count) && 186 (data->hlt_count != last_hlt_count)); 187 188 last_wake_count = data->wake_count; 189 last_hlt_count = data->hlt_count; 190 last_ipis_rcvd_count = ipis_rcvd; 191 } 192} 193 194static void *vcpu_thread(void *arg) 195{ 196 struct thread_params *params = (struct thread_params *)arg; 197 struct kvm_vcpu *vcpu = params->vcpu; 198 struct ucall uc; 199 int old; 200 int r; 201 202 r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); 203 TEST_ASSERT(r == 0, 204 "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", 205 vcpu->id, r); 206 207 fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id); 208 vcpu_run(vcpu); 209 210 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 211 212 if (get_ucall(vcpu, &uc) == UCALL_ABORT) { 213 TEST_ASSERT(false, 214 "vCPU %u exited with error: %s.\n" 215 "Sending vCPU sent %lu IPIs to halting vCPU\n" 216 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 217 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 218 "Migrations attempted: %lu\n" 219 "Migrations completed: %lu", 220 vcpu->id, (const char *)uc.args[0], 221 params->data->ipis_sent, params->data->hlt_count, 222 params->data->wake_count, 223 *params->pipis_rcvd, params->data->halter_tpr, 224 params->data->halter_ppr, params->data->halter_lvr, 225 params->data->migrations_attempted, 226 params->data->migrations_completed); 227 } 228 229 return NULL; 230} 231 232static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) 233{ 234 void *retval; 235 int r; 236 237 r = pthread_cancel(thread); 238 TEST_ASSERT(r == 0, 239 "pthread_cancel on vcpu_id=%d failed with errno=%d", 240 vcpu->id, r); 241 242 r = pthread_join(thread, &retval); 243 TEST_ASSERT(r == 0, 244 "pthread_join on vcpu_id=%d failed with errno=%d", 245 vcpu->id, r); 246 TEST_ASSERT(retval == PTHREAD_CANCELED, 247 "expected retval=%p, got %p", PTHREAD_CANCELED, 248 retval); 249} 250 251void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, 252 uint64_t *pipis_rcvd) 253{ 254 long pages_not_moved; 255 unsigned long nodemask = 0; 256 unsigned long nodemasks[sizeof(nodemask) * 8]; 257 int nodes = 0; 258 time_t start_time, last_update, now; 259 time_t interval_secs = 1; 260 int i, r; 261 int from, to; 262 unsigned long bit; 263 uint64_t hlt_count; 264 uint64_t wake_count; 265 uint64_t ipis_sent; 266 267 fprintf(stderr, "Calling migrate_pages every %d microseconds\n", 268 delay_usecs); 269 270 /* Get set of first 64 numa nodes available */ 271 r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 272 0, MPOL_F_MEMS_ALLOWED); 273 TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); 274 275 fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " 276 "(each 1-bit indicates node is present): %#lx\n", 277 sizeof(nodemask) * 8, nodemask); 278 279 /* Init array of masks containing a single-bit in each, one for each 280 * available node. migrate_pages called below requires specifying nodes 281 * as bit masks. 282 */ 283 for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { 284 if (nodemask & bit) { 285 nodemasks[nodes] = nodemask & bit; 286 nodes++; 287 } 288 } 289 290 TEST_ASSERT(nodes > 1, 291 "Did not find at least 2 numa nodes. Can't do migration"); 292 293 fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); 294 295 from = 0; 296 to = 1; 297 start_time = time(NULL); 298 last_update = start_time; 299 300 ipis_sent = data->ipis_sent; 301 hlt_count = data->hlt_count; 302 wake_count = data->wake_count; 303 304 while ((int)(time(NULL) - start_time) < run_secs) { 305 data->migrations_attempted++; 306 307 /* 308 * migrate_pages with PID=0 will migrate all pages of this 309 * process between the nodes specified as bitmasks. The page 310 * backing the APIC access address belongs to this process 311 * because it is allocated by KVM in the context of the 312 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this 313 * test may break or give a false positive signal. 314 */ 315 pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), 316 &nodemasks[from], 317 &nodemasks[to]); 318 if (pages_not_moved < 0) 319 fprintf(stderr, 320 "migrate_pages failed, errno=%d\n", errno); 321 else if (pages_not_moved > 0) 322 fprintf(stderr, 323 "migrate_pages could not move %ld pages\n", 324 pages_not_moved); 325 else 326 data->migrations_completed++; 327 328 from = to; 329 to++; 330 if (to == nodes) 331 to = 0; 332 333 now = time(NULL); 334 if (((now - start_time) % interval_secs == 0) && 335 (now != last_update)) { 336 last_update = now; 337 fprintf(stderr, 338 "%lu seconds: Migrations attempted=%lu completed=%lu, " 339 "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", 340 now - start_time, data->migrations_attempted, 341 data->migrations_completed, 342 data->ipis_sent, *pipis_rcvd, 343 data->hlt_count, data->wake_count); 344 345 TEST_ASSERT(ipis_sent != data->ipis_sent && 346 hlt_count != data->hlt_count && 347 wake_count != data->wake_count, 348 "IPI, HLT and wake count have not increased " 349 "in the last %lu seconds. " 350 "HLTer is likely hung.", interval_secs); 351 352 ipis_sent = data->ipis_sent; 353 hlt_count = data->hlt_count; 354 wake_count = data->wake_count; 355 } 356 usleep(delay_usecs); 357 } 358} 359 360void get_cmdline_args(int argc, char *argv[], int *run_secs, 361 bool *migrate, int *delay_usecs) 362{ 363 for (;;) { 364 int opt = getopt(argc, argv, "s:d:m"); 365 366 if (opt == -1) 367 break; 368 switch (opt) { 369 case 's': 370 *run_secs = parse_size(optarg); 371 break; 372 case 'm': 373 *migrate = true; 374 break; 375 case 'd': 376 *delay_usecs = parse_size(optarg); 377 break; 378 default: 379 TEST_ASSERT(false, 380 "Usage: -s <runtime seconds>. Default is %d seconds.\n" 381 "-m adds calls to migrate_pages while vCPUs are running." 382 " Default is no migrations.\n" 383 "-d <delay microseconds> - delay between migrate_pages() calls." 384 " Default is %d microseconds.", 385 DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); 386 } 387 } 388} 389 390int main(int argc, char *argv[]) 391{ 392 int r; 393 int wait_secs; 394 const int max_halter_wait = 10; 395 int run_secs = 0; 396 int delay_usecs = 0; 397 struct test_data_page *data; 398 vm_vaddr_t test_data_page_vaddr; 399 bool migrate = false; 400 pthread_t threads[2]; 401 struct thread_params params[2]; 402 struct kvm_vm *vm; 403 uint64_t *pipis_rcvd; 404 405 get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); 406 if (run_secs <= 0) 407 run_secs = DEFAULT_RUN_SECS; 408 if (delay_usecs <= 0) 409 delay_usecs = DEFAULT_DELAY_USECS; 410 411 vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); 412 413 vm_init_descriptor_tables(vm); 414 vcpu_init_descriptor_tables(params[0].vcpu); 415 vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); 416 417 virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); 418 419 params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); 420 421 test_data_page_vaddr = vm_vaddr_alloc_page(vm); 422 data = addr_gva2hva(vm, test_data_page_vaddr); 423 memset(data, 0, sizeof(*data)); 424 params[0].data = data; 425 params[1].data = data; 426 427 vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); 428 vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); 429 430 pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); 431 params[0].pipis_rcvd = pipis_rcvd; 432 params[1].pipis_rcvd = pipis_rcvd; 433 434 /* Start halter vCPU thread and wait for it to execute first HLT. */ 435 r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); 436 TEST_ASSERT(r == 0, 437 "pthread_create halter failed errno=%d", errno); 438 fprintf(stderr, "Halter vCPU thread started\n"); 439 440 wait_secs = 0; 441 while ((wait_secs < max_halter_wait) && !data->hlt_count) { 442 sleep(1); 443 wait_secs++; 444 } 445 446 TEST_ASSERT(data->hlt_count, 447 "Halter vCPU did not execute first HLT within %d seconds", 448 max_halter_wait); 449 450 fprintf(stderr, 451 "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", 452 data->halter_apic_id, wait_secs); 453 454 r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); 455 TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); 456 457 fprintf(stderr, 458 "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", 459 run_secs); 460 461 if (!migrate) 462 sleep(run_secs); 463 else 464 do_migrations(data, run_secs, delay_usecs, pipis_rcvd); 465 466 /* 467 * Cancel threads and wait for them to stop. 468 */ 469 cancel_join_vcpu_thread(threads[0], params[0].vcpu); 470 cancel_join_vcpu_thread(threads[1], params[1].vcpu); 471 472 fprintf(stderr, 473 "Test successful after running for %d seconds.\n" 474 "Sending vCPU sent %lu IPIs to halting vCPU\n" 475 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 476 "Halter APIC ID=%#x\n" 477 "Sender ICR value=%#x ICR2 value=%#x\n" 478 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 479 "Migrations attempted: %lu\n" 480 "Migrations completed: %lu\n", 481 run_secs, data->ipis_sent, 482 data->hlt_count, data->wake_count, *pipis_rcvd, 483 data->halter_apic_id, 484 data->icr, data->icr2, 485 data->halter_tpr, data->halter_ppr, data->halter_lvr, 486 data->migrations_attempted, data->migrations_completed); 487 488 kvm_vm_free(vm); 489 490 return 0; 491} 492