1/* $NetBSD: selftest_lrc.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */ 2 3/* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright �� 2018 Intel Corporation 7 */ 8 9#include <sys/cdefs.h> 10__KERNEL_RCSID(0, "$NetBSD: selftest_lrc.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $"); 11 12#include <linux/prime_numbers.h> 13 14#include "gem/i915_gem_pm.h" 15#include "gt/intel_engine_heartbeat.h" 16#include "gt/intel_reset.h" 17 18#include "i915_selftest.h" 19#include "selftests/i915_random.h" 20#include "selftests/igt_flush_test.h" 21#include "selftests/igt_live_test.h" 22#include "selftests/igt_spinner.h" 23#include "selftests/lib_sw_fence.h" 24 25#include "gem/selftests/igt_gem_utils.h" 26#include "gem/selftests/mock_context.h" 27 28#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 29#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ 30 31static struct i915_vma *create_scratch(struct intel_gt *gt) 32{ 33 struct drm_i915_gem_object *obj; 34 struct i915_vma *vma; 35 int err; 36 37 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 38 if (IS_ERR(obj)) 39 return ERR_CAST(obj); 40 41 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 42 43 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 44 if (IS_ERR(vma)) { 45 i915_gem_object_put(obj); 46 return vma; 47 } 48 49 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 50 if (err) { 51 i915_gem_object_put(obj); 52 return ERR_PTR(err); 53 } 54 55 return vma; 56} 57 58static void engine_heartbeat_disable(struct intel_engine_cs *engine, 59 unsigned long *saved) 60{ 61 *saved = engine->props.heartbeat_interval_ms; 62 engine->props.heartbeat_interval_ms = 0; 63 64 intel_engine_pm_get(engine); 65 intel_engine_park_heartbeat(engine); 66} 67 68static void engine_heartbeat_enable(struct intel_engine_cs *engine, 69 unsigned long saved) 70{ 71 intel_engine_pm_put(engine); 72 73 engine->props.heartbeat_interval_ms = saved; 74} 75 76static int live_sanitycheck(void *arg) 77{ 78 struct intel_gt *gt = arg; 79 struct intel_engine_cs *engine; 80 enum intel_engine_id id; 81 struct igt_spinner spin; 82 int err = 0; 83 84 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 85 return 0; 86 87 if (igt_spinner_init(&spin, gt)) 88 return -ENOMEM; 89 90 for_each_engine(engine, gt, id) { 91 struct intel_context *ce; 92 struct i915_request *rq; 93 94 ce = intel_context_create(engine); 95 if (IS_ERR(ce)) { 96 err = PTR_ERR(ce); 97 break; 98 } 99 100 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 101 if (IS_ERR(rq)) { 102 err = PTR_ERR(rq); 103 goto out_ctx; 104 } 105 106 i915_request_add(rq); 107 if (!igt_wait_for_spinner(&spin, rq)) { 108 GEM_TRACE("spinner failed to start\n"); 109 GEM_TRACE_DUMP(); 110 intel_gt_set_wedged(gt); 111 err = -EIO; 112 goto out_ctx; 113 } 114 115 igt_spinner_end(&spin); 116 if (igt_flush_test(gt->i915)) { 117 err = -EIO; 118 goto out_ctx; 119 } 120 121out_ctx: 122 intel_context_put(ce); 123 if (err) 124 break; 125 } 126 127 igt_spinner_fini(&spin); 128 return err; 129} 130 131static int live_unlite_restore(struct intel_gt *gt, int prio) 132{ 133 struct intel_engine_cs *engine; 134 enum intel_engine_id id; 135 struct igt_spinner spin; 136 int err = -ENOMEM; 137 138 /* 139 * Check that we can correctly context switch between 2 instances 140 * on the same engine from the same parent context. 141 */ 142 143 if (igt_spinner_init(&spin, gt)) 144 return err; 145 146 err = 0; 147 for_each_engine(engine, gt, id) { 148 struct intel_context *ce[2] = {}; 149 struct i915_request *rq[2]; 150 struct igt_live_test t; 151 unsigned long saved; 152 int n; 153 154 if (prio && !intel_engine_has_preemption(engine)) 155 continue; 156 157 if (!intel_engine_can_store_dword(engine)) 158 continue; 159 160 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 161 err = -EIO; 162 break; 163 } 164 engine_heartbeat_disable(engine, &saved); 165 166 for (n = 0; n < ARRAY_SIZE(ce); n++) { 167 struct intel_context *tmp; 168 169 tmp = intel_context_create(engine); 170 if (IS_ERR(tmp)) { 171 err = PTR_ERR(tmp); 172 goto err_ce; 173 } 174 175 err = intel_context_pin(tmp); 176 if (err) { 177 intel_context_put(tmp); 178 goto err_ce; 179 } 180 181 /* 182 * Setup the pair of contexts such that if we 183 * lite-restore using the RING_TAIL from ce[1] it 184 * will execute garbage from ce[0]->ring. 185 */ 186 memset(tmp->ring->vaddr, 187 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 188 tmp->ring->vma->size); 189 190 ce[n] = tmp; 191 } 192 GEM_BUG_ON(!ce[1]->ring->size); 193 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 194 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); 195 196 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 197 if (IS_ERR(rq[0])) { 198 err = PTR_ERR(rq[0]); 199 goto err_ce; 200 } 201 202 i915_request_get(rq[0]); 203 i915_request_add(rq[0]); 204 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 205 206 if (!igt_wait_for_spinner(&spin, rq[0])) { 207 i915_request_put(rq[0]); 208 goto err_ce; 209 } 210 211 rq[1] = i915_request_create(ce[1]); 212 if (IS_ERR(rq[1])) { 213 err = PTR_ERR(rq[1]); 214 i915_request_put(rq[0]); 215 goto err_ce; 216 } 217 218 if (!prio) { 219 /* 220 * Ensure we do the switch to ce[1] on completion. 221 * 222 * rq[0] is already submitted, so this should reduce 223 * to a no-op (a wait on a request on the same engine 224 * uses the submit fence, not the completion fence), 225 * but it will install a dependency on rq[1] for rq[0] 226 * that will prevent the pair being reordered by 227 * timeslicing. 228 */ 229 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 230 } 231 232 i915_request_get(rq[1]); 233 i915_request_add(rq[1]); 234 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 235 i915_request_put(rq[0]); 236 237 if (prio) { 238 struct i915_sched_attr attr = { 239 .priority = prio, 240 }; 241 242 /* Alternatively preempt the spinner with ce[1] */ 243 engine->schedule(rq[1], &attr); 244 } 245 246 /* And switch back to ce[0] for good measure */ 247 rq[0] = i915_request_create(ce[0]); 248 if (IS_ERR(rq[0])) { 249 err = PTR_ERR(rq[0]); 250 i915_request_put(rq[1]); 251 goto err_ce; 252 } 253 254 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 255 i915_request_get(rq[0]); 256 i915_request_add(rq[0]); 257 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 258 i915_request_put(rq[1]); 259 i915_request_put(rq[0]); 260 261err_ce: 262 tasklet_kill(&engine->execlists.tasklet); /* flush submission */ 263 igt_spinner_end(&spin); 264 for (n = 0; n < ARRAY_SIZE(ce); n++) { 265 if (IS_ERR_OR_NULL(ce[n])) 266 break; 267 268 intel_context_unpin(ce[n]); 269 intel_context_put(ce[n]); 270 } 271 272 engine_heartbeat_enable(engine, saved); 273 if (igt_live_test_end(&t)) 274 err = -EIO; 275 if (err) 276 break; 277 } 278 279 igt_spinner_fini(&spin); 280 return err; 281} 282 283static int live_unlite_switch(void *arg) 284{ 285 return live_unlite_restore(arg, 0); 286} 287 288static int live_unlite_preempt(void *arg) 289{ 290 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 291} 292 293static int live_hold_reset(void *arg) 294{ 295 struct intel_gt *gt = arg; 296 struct intel_engine_cs *engine; 297 enum intel_engine_id id; 298 struct igt_spinner spin; 299 int err = 0; 300 301 /* 302 * In order to support offline error capture for fast preempt reset, 303 * we need to decouple the guilty request and ensure that it and its 304 * descendents are not executed while the capture is in progress. 305 */ 306 307 if (!intel_has_reset_engine(gt)) 308 return 0; 309 310 if (igt_spinner_init(&spin, gt)) 311 return -ENOMEM; 312 313 for_each_engine(engine, gt, id) { 314 struct intel_context *ce; 315 unsigned long heartbeat; 316 struct i915_request *rq; 317 318 ce = intel_context_create(engine); 319 if (IS_ERR(ce)) { 320 err = PTR_ERR(ce); 321 break; 322 } 323 324 engine_heartbeat_disable(engine, &heartbeat); 325 326 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 327 if (IS_ERR(rq)) { 328 err = PTR_ERR(rq); 329 goto out; 330 } 331 i915_request_add(rq); 332 333 if (!igt_wait_for_spinner(&spin, rq)) { 334 intel_gt_set_wedged(gt); 335 err = -ETIME; 336 goto out; 337 } 338 339 /* We have our request executing, now remove it and reset */ 340 341 if (test_and_set_bit(I915_RESET_ENGINE + id, 342 >->reset.flags)) { 343 intel_gt_set_wedged(gt); 344 err = -EBUSY; 345 goto out; 346 } 347 tasklet_disable(&engine->execlists.tasklet); 348 349 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 350 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 351 352 i915_request_get(rq); 353 execlists_hold(engine, rq); 354 GEM_BUG_ON(!i915_request_on_hold(rq)); 355 356 intel_engine_reset(engine, NULL); 357 GEM_BUG_ON(rq->fence.error != -EIO); 358 359 tasklet_enable(&engine->execlists.tasklet); 360 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 361 >->reset.flags); 362 363 /* Check that we do not resubmit the held request */ 364 if (!i915_request_wait(rq, 0, HZ / 5)) { 365 pr_err("%s: on hold request completed!\n", 366 engine->name); 367 i915_request_put(rq); 368 err = -EIO; 369 goto out; 370 } 371 GEM_BUG_ON(!i915_request_on_hold(rq)); 372 373 /* But is resubmitted on release */ 374 execlists_unhold(engine, rq); 375 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 376 pr_err("%s: held request did not complete!\n", 377 engine->name); 378 intel_gt_set_wedged(gt); 379 err = -ETIME; 380 } 381 i915_request_put(rq); 382 383out: 384 engine_heartbeat_enable(engine, heartbeat); 385 intel_context_put(ce); 386 if (err) 387 break; 388 } 389 390 igt_spinner_fini(&spin); 391 return err; 392} 393 394static int 395emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 396{ 397 u32 *cs; 398 399 cs = intel_ring_begin(rq, 10); 400 if (IS_ERR(cs)) 401 return PTR_ERR(cs); 402 403 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 404 405 *cs++ = MI_SEMAPHORE_WAIT | 406 MI_SEMAPHORE_GLOBAL_GTT | 407 MI_SEMAPHORE_POLL | 408 MI_SEMAPHORE_SAD_NEQ_SDD; 409 *cs++ = 0; 410 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 411 *cs++ = 0; 412 413 if (idx > 0) { 414 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 415 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 416 *cs++ = 0; 417 *cs++ = 1; 418 } else { 419 *cs++ = MI_NOOP; 420 *cs++ = MI_NOOP; 421 *cs++ = MI_NOOP; 422 *cs++ = MI_NOOP; 423 } 424 425 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 426 427 intel_ring_advance(rq, cs); 428 return 0; 429} 430 431static struct i915_request * 432semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 433{ 434 struct intel_context *ce; 435 struct i915_request *rq; 436 int err; 437 438 ce = intel_context_create(engine); 439 if (IS_ERR(ce)) 440 return ERR_CAST(ce); 441 442 rq = intel_context_create_request(ce); 443 if (IS_ERR(rq)) 444 goto out_ce; 445 446 err = 0; 447 if (rq->engine->emit_init_breadcrumb) 448 err = rq->engine->emit_init_breadcrumb(rq); 449 if (err == 0) 450 err = emit_semaphore_chain(rq, vma, idx); 451 if (err == 0) 452 i915_request_get(rq); 453 i915_request_add(rq); 454 if (err) 455 rq = ERR_PTR(err); 456 457out_ce: 458 intel_context_put(ce); 459 return rq; 460} 461 462static int 463release_queue(struct intel_engine_cs *engine, 464 struct i915_vma *vma, 465 int idx, int prio) 466{ 467 struct i915_sched_attr attr = { 468 .priority = prio, 469 }; 470 struct i915_request *rq; 471 u32 *cs; 472 473 rq = intel_engine_create_kernel_request(engine); 474 if (IS_ERR(rq)) 475 return PTR_ERR(rq); 476 477 cs = intel_ring_begin(rq, 4); 478 if (IS_ERR(cs)) { 479 i915_request_add(rq); 480 return PTR_ERR(cs); 481 } 482 483 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 484 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 485 *cs++ = 0; 486 *cs++ = 1; 487 488 intel_ring_advance(rq, cs); 489 490 i915_request_get(rq); 491 i915_request_add(rq); 492 493 local_bh_disable(); 494 engine->schedule(rq, &attr); 495 local_bh_enable(); /* kick tasklet */ 496 497 i915_request_put(rq); 498 499 return 0; 500} 501 502static int 503slice_semaphore_queue(struct intel_engine_cs *outer, 504 struct i915_vma *vma, 505 int count) 506{ 507 struct intel_engine_cs *engine; 508 struct i915_request *head; 509 enum intel_engine_id id; 510 int err, i, n = 0; 511 512 head = semaphore_queue(outer, vma, n++); 513 if (IS_ERR(head)) 514 return PTR_ERR(head); 515 516 for_each_engine(engine, outer->gt, id) { 517 for (i = 0; i < count; i++) { 518 struct i915_request *rq; 519 520 rq = semaphore_queue(engine, vma, n++); 521 if (IS_ERR(rq)) { 522 err = PTR_ERR(rq); 523 goto out; 524 } 525 526 i915_request_put(rq); 527 } 528 } 529 530 err = release_queue(outer, vma, n, INT_MAX); 531 if (err) 532 goto out; 533 534 if (i915_request_wait(head, 0, 535 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { 536 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 537 count, n); 538 GEM_TRACE_DUMP(); 539 intel_gt_set_wedged(outer->gt); 540 err = -EIO; 541 } 542 543out: 544 i915_request_put(head); 545 return err; 546} 547 548static int live_timeslice_preempt(void *arg) 549{ 550 struct intel_gt *gt = arg; 551 struct drm_i915_gem_object *obj; 552 struct i915_vma *vma; 553 void *vaddr; 554 int err = 0; 555 int count; 556 557 /* 558 * If a request takes too long, we would like to give other users 559 * a fair go on the GPU. In particular, users may create batches 560 * that wait upon external input, where that input may even be 561 * supplied by another GPU job. To avoid blocking forever, we 562 * need to preempt the current task and replace it with another 563 * ready task. 564 */ 565 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 566 return 0; 567 568 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 569 if (IS_ERR(obj)) 570 return PTR_ERR(obj); 571 572 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 573 if (IS_ERR(vma)) { 574 err = PTR_ERR(vma); 575 goto err_obj; 576 } 577 578 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 579 if (IS_ERR(vaddr)) { 580 err = PTR_ERR(vaddr); 581 goto err_obj; 582 } 583 584 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 585 if (err) 586 goto err_map; 587 588 for_each_prime_number_from(count, 1, 16) { 589 struct intel_engine_cs *engine; 590 enum intel_engine_id id; 591 592 for_each_engine(engine, gt, id) { 593 unsigned long saved; 594 595 if (!intel_engine_has_preemption(engine)) 596 continue; 597 598 memset(vaddr, 0, PAGE_SIZE); 599 600 engine_heartbeat_disable(engine, &saved); 601 err = slice_semaphore_queue(engine, vma, count); 602 engine_heartbeat_enable(engine, saved); 603 if (err) 604 goto err_pin; 605 606 if (igt_flush_test(gt->i915)) { 607 err = -EIO; 608 goto err_pin; 609 } 610 } 611 } 612 613err_pin: 614 i915_vma_unpin(vma); 615err_map: 616 i915_gem_object_unpin_map(obj); 617err_obj: 618 i915_gem_object_put(obj); 619 return err; 620} 621 622static struct i915_request *nop_request(struct intel_engine_cs *engine) 623{ 624 struct i915_request *rq; 625 626 rq = intel_engine_create_kernel_request(engine); 627 if (IS_ERR(rq)) 628 return rq; 629 630 i915_request_get(rq); 631 i915_request_add(rq); 632 633 return rq; 634} 635 636static int wait_for_submit(struct intel_engine_cs *engine, 637 struct i915_request *rq, 638 unsigned long timeout) 639{ 640 timeout += jiffies; 641 do { 642 cond_resched(); 643 intel_engine_flush_submission(engine); 644 if (i915_request_is_active(rq)) 645 return 0; 646 } while (time_before(jiffies, timeout)); 647 648 return -ETIME; 649} 650 651static long timeslice_threshold(const struct intel_engine_cs *engine) 652{ 653 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; 654} 655 656static int live_timeslice_queue(void *arg) 657{ 658 struct intel_gt *gt = arg; 659 struct drm_i915_gem_object *obj; 660 struct intel_engine_cs *engine; 661 enum intel_engine_id id; 662 struct i915_vma *vma; 663 void *vaddr; 664 int err = 0; 665 666 /* 667 * Make sure that even if ELSP[0] and ELSP[1] are filled with 668 * timeslicing between them disabled, we *do* enable timeslicing 669 * if the queue demands it. (Normally, we do not submit if 670 * ELSP[1] is already occupied, so must rely on timeslicing to 671 * eject ELSP[0] in favour of the queue.) 672 */ 673 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 674 return 0; 675 676 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 677 if (IS_ERR(obj)) 678 return PTR_ERR(obj); 679 680 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 681 if (IS_ERR(vma)) { 682 err = PTR_ERR(vma); 683 goto err_obj; 684 } 685 686 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 687 if (IS_ERR(vaddr)) { 688 err = PTR_ERR(vaddr); 689 goto err_obj; 690 } 691 692 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 693 if (err) 694 goto err_map; 695 696 for_each_engine(engine, gt, id) { 697 struct i915_sched_attr attr = { 698 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 699 }; 700 struct i915_request *rq, *nop; 701 unsigned long saved; 702 703 if (!intel_engine_has_preemption(engine)) 704 continue; 705 706 engine_heartbeat_disable(engine, &saved); 707 memset(vaddr, 0, PAGE_SIZE); 708 709 /* ELSP[0]: semaphore wait */ 710 rq = semaphore_queue(engine, vma, 0); 711 if (IS_ERR(rq)) { 712 err = PTR_ERR(rq); 713 goto err_heartbeat; 714 } 715 engine->schedule(rq, &attr); 716 err = wait_for_submit(engine, rq, HZ / 2); 717 if (err) { 718 pr_err("%s: Timed out trying to submit semaphores\n", 719 engine->name); 720 goto err_rq; 721 } 722 723 /* ELSP[1]: nop request */ 724 nop = nop_request(engine); 725 if (IS_ERR(nop)) { 726 err = PTR_ERR(nop); 727 goto err_rq; 728 } 729 err = wait_for_submit(engine, nop, HZ / 2); 730 i915_request_put(nop); 731 if (err) { 732 pr_err("%s: Timed out trying to submit nop\n", 733 engine->name); 734 goto err_rq; 735 } 736 737 GEM_BUG_ON(i915_request_completed(rq)); 738 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 739 740 /* Queue: semaphore signal, matching priority as semaphore */ 741 err = release_queue(engine, vma, 1, effective_prio(rq)); 742 if (err) 743 goto err_rq; 744 745 intel_engine_flush_submission(engine); 746 if (!READ_ONCE(engine->execlists.timer.expires) && 747 !i915_request_completed(rq)) { 748 struct drm_printer p = 749 drm_info_printer(gt->i915->drm.dev); 750 751 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", 752 engine->name); 753 intel_engine_dump(engine, &p, 754 "%s\n", engine->name); 755 GEM_TRACE_DUMP(); 756 757 memset(vaddr, 0xff, PAGE_SIZE); 758 err = -EINVAL; 759 } 760 761 /* Timeslice every jiffy, so within 2 we should signal */ 762 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { 763 struct drm_printer p = 764 drm_info_printer(gt->i915->drm.dev); 765 766 pr_err("%s: Failed to timeslice into queue\n", 767 engine->name); 768 intel_engine_dump(engine, &p, 769 "%s\n", engine->name); 770 771 memset(vaddr, 0xff, PAGE_SIZE); 772 err = -EIO; 773 } 774err_rq: 775 i915_request_put(rq); 776err_heartbeat: 777 engine_heartbeat_enable(engine, saved); 778 if (err) 779 break; 780 } 781 782 i915_vma_unpin(vma); 783err_map: 784 i915_gem_object_unpin_map(obj); 785err_obj: 786 i915_gem_object_put(obj); 787 return err; 788} 789 790static int live_busywait_preempt(void *arg) 791{ 792 struct intel_gt *gt = arg; 793 struct i915_gem_context *ctx_hi, *ctx_lo; 794 struct intel_engine_cs *engine; 795 struct drm_i915_gem_object *obj; 796 struct i915_vma *vma; 797 enum intel_engine_id id; 798 int err = -ENOMEM; 799 u32 *map; 800 801 /* 802 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 803 * preempt the busywaits used to synchronise between rings. 804 */ 805 806 ctx_hi = kernel_context(gt->i915); 807 if (!ctx_hi) 808 return -ENOMEM; 809 ctx_hi->sched.priority = 810 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 811 812 ctx_lo = kernel_context(gt->i915); 813 if (!ctx_lo) 814 goto err_ctx_hi; 815 ctx_lo->sched.priority = 816 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 817 818 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 819 if (IS_ERR(obj)) { 820 err = PTR_ERR(obj); 821 goto err_ctx_lo; 822 } 823 824 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 825 if (IS_ERR(map)) { 826 err = PTR_ERR(map); 827 goto err_obj; 828 } 829 830 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 831 if (IS_ERR(vma)) { 832 err = PTR_ERR(vma); 833 goto err_map; 834 } 835 836 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 837 if (err) 838 goto err_map; 839 840 for_each_engine(engine, gt, id) { 841 struct i915_request *lo, *hi; 842 struct igt_live_test t; 843 u32 *cs; 844 845 if (!intel_engine_has_preemption(engine)) 846 continue; 847 848 if (!intel_engine_can_store_dword(engine)) 849 continue; 850 851 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 852 err = -EIO; 853 goto err_vma; 854 } 855 856 /* 857 * We create two requests. The low priority request 858 * busywaits on a semaphore (inside the ringbuffer where 859 * is should be preemptible) and the high priority requests 860 * uses a MI_STORE_DWORD_IMM to update the semaphore value 861 * allowing the first request to complete. If preemption 862 * fails, we hang instead. 863 */ 864 865 lo = igt_request_alloc(ctx_lo, engine); 866 if (IS_ERR(lo)) { 867 err = PTR_ERR(lo); 868 goto err_vma; 869 } 870 871 cs = intel_ring_begin(lo, 8); 872 if (IS_ERR(cs)) { 873 err = PTR_ERR(cs); 874 i915_request_add(lo); 875 goto err_vma; 876 } 877 878 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 879 *cs++ = i915_ggtt_offset(vma); 880 *cs++ = 0; 881 *cs++ = 1; 882 883 /* XXX Do we need a flush + invalidate here? */ 884 885 *cs++ = MI_SEMAPHORE_WAIT | 886 MI_SEMAPHORE_GLOBAL_GTT | 887 MI_SEMAPHORE_POLL | 888 MI_SEMAPHORE_SAD_EQ_SDD; 889 *cs++ = 0; 890 *cs++ = i915_ggtt_offset(vma); 891 *cs++ = 0; 892 893 intel_ring_advance(lo, cs); 894 895 i915_request_get(lo); 896 i915_request_add(lo); 897 898 if (wait_for(READ_ONCE(*map), 10)) { 899 i915_request_put(lo); 900 err = -ETIMEDOUT; 901 goto err_vma; 902 } 903 904 /* Low priority request should be busywaiting now */ 905 if (i915_request_wait(lo, 0, 1) != -ETIME) { 906 i915_request_put(lo); 907 pr_err("%s: Busywaiting request did not!\n", 908 engine->name); 909 err = -EIO; 910 goto err_vma; 911 } 912 913 hi = igt_request_alloc(ctx_hi, engine); 914 if (IS_ERR(hi)) { 915 err = PTR_ERR(hi); 916 i915_request_put(lo); 917 goto err_vma; 918 } 919 920 cs = intel_ring_begin(hi, 4); 921 if (IS_ERR(cs)) { 922 err = PTR_ERR(cs); 923 i915_request_add(hi); 924 i915_request_put(lo); 925 goto err_vma; 926 } 927 928 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 929 *cs++ = i915_ggtt_offset(vma); 930 *cs++ = 0; 931 *cs++ = 0; 932 933 intel_ring_advance(hi, cs); 934 i915_request_add(hi); 935 936 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 937 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 938 939 pr_err("%s: Failed to preempt semaphore busywait!\n", 940 engine->name); 941 942 intel_engine_dump(engine, &p, "%s\n", engine->name); 943 GEM_TRACE_DUMP(); 944 945 i915_request_put(lo); 946 intel_gt_set_wedged(gt); 947 err = -EIO; 948 goto err_vma; 949 } 950 GEM_BUG_ON(READ_ONCE(*map)); 951 i915_request_put(lo); 952 953 if (igt_live_test_end(&t)) { 954 err = -EIO; 955 goto err_vma; 956 } 957 } 958 959 err = 0; 960err_vma: 961 i915_vma_unpin(vma); 962err_map: 963 i915_gem_object_unpin_map(obj); 964err_obj: 965 i915_gem_object_put(obj); 966err_ctx_lo: 967 kernel_context_close(ctx_lo); 968err_ctx_hi: 969 kernel_context_close(ctx_hi); 970 return err; 971} 972 973static struct i915_request * 974spinner_create_request(struct igt_spinner *spin, 975 struct i915_gem_context *ctx, 976 struct intel_engine_cs *engine, 977 u32 arb) 978{ 979 struct intel_context *ce; 980 struct i915_request *rq; 981 982 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 983 if (IS_ERR(ce)) 984 return ERR_CAST(ce); 985 986 rq = igt_spinner_create_request(spin, ce, arb); 987 intel_context_put(ce); 988 return rq; 989} 990 991static int live_preempt(void *arg) 992{ 993 struct intel_gt *gt = arg; 994 struct i915_gem_context *ctx_hi, *ctx_lo; 995 struct igt_spinner spin_hi, spin_lo; 996 struct intel_engine_cs *engine; 997 enum intel_engine_id id; 998 int err = -ENOMEM; 999 1000 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1001 return 0; 1002 1003 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 1004 pr_err("Logical preemption supported, but not exposed\n"); 1005 1006 if (igt_spinner_init(&spin_hi, gt)) 1007 return -ENOMEM; 1008 1009 if (igt_spinner_init(&spin_lo, gt)) 1010 goto err_spin_hi; 1011 1012 ctx_hi = kernel_context(gt->i915); 1013 if (!ctx_hi) 1014 goto err_spin_lo; 1015 ctx_hi->sched.priority = 1016 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1017 1018 ctx_lo = kernel_context(gt->i915); 1019 if (!ctx_lo) 1020 goto err_ctx_hi; 1021 ctx_lo->sched.priority = 1022 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1023 1024 for_each_engine(engine, gt, id) { 1025 struct igt_live_test t; 1026 struct i915_request *rq; 1027 1028 if (!intel_engine_has_preemption(engine)) 1029 continue; 1030 1031 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1032 err = -EIO; 1033 goto err_ctx_lo; 1034 } 1035 1036 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1037 MI_ARB_CHECK); 1038 if (IS_ERR(rq)) { 1039 err = PTR_ERR(rq); 1040 goto err_ctx_lo; 1041 } 1042 1043 i915_request_add(rq); 1044 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1045 GEM_TRACE("lo spinner failed to start\n"); 1046 GEM_TRACE_DUMP(); 1047 intel_gt_set_wedged(gt); 1048 err = -EIO; 1049 goto err_ctx_lo; 1050 } 1051 1052 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1053 MI_ARB_CHECK); 1054 if (IS_ERR(rq)) { 1055 igt_spinner_end(&spin_lo); 1056 err = PTR_ERR(rq); 1057 goto err_ctx_lo; 1058 } 1059 1060 i915_request_add(rq); 1061 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1062 GEM_TRACE("hi spinner failed to start\n"); 1063 GEM_TRACE_DUMP(); 1064 intel_gt_set_wedged(gt); 1065 err = -EIO; 1066 goto err_ctx_lo; 1067 } 1068 1069 igt_spinner_end(&spin_hi); 1070 igt_spinner_end(&spin_lo); 1071 1072 if (igt_live_test_end(&t)) { 1073 err = -EIO; 1074 goto err_ctx_lo; 1075 } 1076 } 1077 1078 err = 0; 1079err_ctx_lo: 1080 kernel_context_close(ctx_lo); 1081err_ctx_hi: 1082 kernel_context_close(ctx_hi); 1083err_spin_lo: 1084 igt_spinner_fini(&spin_lo); 1085err_spin_hi: 1086 igt_spinner_fini(&spin_hi); 1087 return err; 1088} 1089 1090static int live_late_preempt(void *arg) 1091{ 1092 struct intel_gt *gt = arg; 1093 struct i915_gem_context *ctx_hi, *ctx_lo; 1094 struct igt_spinner spin_hi, spin_lo; 1095 struct intel_engine_cs *engine; 1096 struct i915_sched_attr attr = {}; 1097 enum intel_engine_id id; 1098 int err = -ENOMEM; 1099 1100 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1101 return 0; 1102 1103 if (igt_spinner_init(&spin_hi, gt)) 1104 return -ENOMEM; 1105 1106 if (igt_spinner_init(&spin_lo, gt)) 1107 goto err_spin_hi; 1108 1109 ctx_hi = kernel_context(gt->i915); 1110 if (!ctx_hi) 1111 goto err_spin_lo; 1112 1113 ctx_lo = kernel_context(gt->i915); 1114 if (!ctx_lo) 1115 goto err_ctx_hi; 1116 1117 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1118 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1119 1120 for_each_engine(engine, gt, id) { 1121 struct igt_live_test t; 1122 struct i915_request *rq; 1123 1124 if (!intel_engine_has_preemption(engine)) 1125 continue; 1126 1127 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1128 err = -EIO; 1129 goto err_ctx_lo; 1130 } 1131 1132 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1133 MI_ARB_CHECK); 1134 if (IS_ERR(rq)) { 1135 err = PTR_ERR(rq); 1136 goto err_ctx_lo; 1137 } 1138 1139 i915_request_add(rq); 1140 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1141 pr_err("First context failed to start\n"); 1142 goto err_wedged; 1143 } 1144 1145 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1146 MI_NOOP); 1147 if (IS_ERR(rq)) { 1148 igt_spinner_end(&spin_lo); 1149 err = PTR_ERR(rq); 1150 goto err_ctx_lo; 1151 } 1152 1153 i915_request_add(rq); 1154 if (igt_wait_for_spinner(&spin_hi, rq)) { 1155 pr_err("Second context overtook first?\n"); 1156 goto err_wedged; 1157 } 1158 1159 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1160 engine->schedule(rq, &attr); 1161 1162 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1163 pr_err("High priority context failed to preempt the low priority context\n"); 1164 GEM_TRACE_DUMP(); 1165 goto err_wedged; 1166 } 1167 1168 igt_spinner_end(&spin_hi); 1169 igt_spinner_end(&spin_lo); 1170 1171 if (igt_live_test_end(&t)) { 1172 err = -EIO; 1173 goto err_ctx_lo; 1174 } 1175 } 1176 1177 err = 0; 1178err_ctx_lo: 1179 kernel_context_close(ctx_lo); 1180err_ctx_hi: 1181 kernel_context_close(ctx_hi); 1182err_spin_lo: 1183 igt_spinner_fini(&spin_lo); 1184err_spin_hi: 1185 igt_spinner_fini(&spin_hi); 1186 return err; 1187 1188err_wedged: 1189 igt_spinner_end(&spin_hi); 1190 igt_spinner_end(&spin_lo); 1191 intel_gt_set_wedged(gt); 1192 err = -EIO; 1193 goto err_ctx_lo; 1194} 1195 1196struct preempt_client { 1197 struct igt_spinner spin; 1198 struct i915_gem_context *ctx; 1199}; 1200 1201static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1202{ 1203 c->ctx = kernel_context(gt->i915); 1204 if (!c->ctx) 1205 return -ENOMEM; 1206 1207 if (igt_spinner_init(&c->spin, gt)) 1208 goto err_ctx; 1209 1210 return 0; 1211 1212err_ctx: 1213 kernel_context_close(c->ctx); 1214 return -ENOMEM; 1215} 1216 1217static void preempt_client_fini(struct preempt_client *c) 1218{ 1219 igt_spinner_fini(&c->spin); 1220 kernel_context_close(c->ctx); 1221} 1222 1223static int live_nopreempt(void *arg) 1224{ 1225 struct intel_gt *gt = arg; 1226 struct intel_engine_cs *engine; 1227 struct preempt_client a, b; 1228 enum intel_engine_id id; 1229 int err = -ENOMEM; 1230 1231 /* 1232 * Verify that we can disable preemption for an individual request 1233 * that may be being observed and not want to be interrupted. 1234 */ 1235 1236 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1237 return 0; 1238 1239 if (preempt_client_init(gt, &a)) 1240 return -ENOMEM; 1241 if (preempt_client_init(gt, &b)) 1242 goto err_client_a; 1243 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1244 1245 for_each_engine(engine, gt, id) { 1246 struct i915_request *rq_a, *rq_b; 1247 1248 if (!intel_engine_has_preemption(engine)) 1249 continue; 1250 1251 engine->execlists.preempt_hang.count = 0; 1252 1253 rq_a = spinner_create_request(&a.spin, 1254 a.ctx, engine, 1255 MI_ARB_CHECK); 1256 if (IS_ERR(rq_a)) { 1257 err = PTR_ERR(rq_a); 1258 goto err_client_b; 1259 } 1260 1261 /* Low priority client, but unpreemptable! */ 1262 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1263 1264 i915_request_add(rq_a); 1265 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1266 pr_err("First client failed to start\n"); 1267 goto err_wedged; 1268 } 1269 1270 rq_b = spinner_create_request(&b.spin, 1271 b.ctx, engine, 1272 MI_ARB_CHECK); 1273 if (IS_ERR(rq_b)) { 1274 err = PTR_ERR(rq_b); 1275 goto err_client_b; 1276 } 1277 1278 i915_request_add(rq_b); 1279 1280 /* B is much more important than A! (But A is unpreemptable.) */ 1281 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 1282 1283 /* Wait long enough for preemption and timeslicing */ 1284 if (igt_wait_for_spinner(&b.spin, rq_b)) { 1285 pr_err("Second client started too early!\n"); 1286 goto err_wedged; 1287 } 1288 1289 igt_spinner_end(&a.spin); 1290 1291 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1292 pr_err("Second client failed to start\n"); 1293 goto err_wedged; 1294 } 1295 1296 igt_spinner_end(&b.spin); 1297 1298 if (engine->execlists.preempt_hang.count) { 1299 pr_err("Preemption recorded x%d; should have been suppressed!\n", 1300 engine->execlists.preempt_hang.count); 1301 err = -EINVAL; 1302 goto err_wedged; 1303 } 1304 1305 if (igt_flush_test(gt->i915)) 1306 goto err_wedged; 1307 } 1308 1309 err = 0; 1310err_client_b: 1311 preempt_client_fini(&b); 1312err_client_a: 1313 preempt_client_fini(&a); 1314 return err; 1315 1316err_wedged: 1317 igt_spinner_end(&b.spin); 1318 igt_spinner_end(&a.spin); 1319 intel_gt_set_wedged(gt); 1320 err = -EIO; 1321 goto err_client_b; 1322} 1323 1324struct live_preempt_cancel { 1325 struct intel_engine_cs *engine; 1326 struct preempt_client a, b; 1327}; 1328 1329static int __cancel_active0(struct live_preempt_cancel *arg) 1330{ 1331 struct i915_request *rq; 1332 struct igt_live_test t; 1333 int err; 1334 1335 /* Preempt cancel of ELSP0 */ 1336 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1337 if (igt_live_test_begin(&t, arg->engine->i915, 1338 __func__, arg->engine->name)) 1339 return -EIO; 1340 1341 rq = spinner_create_request(&arg->a.spin, 1342 arg->a.ctx, arg->engine, 1343 MI_ARB_CHECK); 1344 if (IS_ERR(rq)) 1345 return PTR_ERR(rq); 1346 1347 clear_bit(CONTEXT_BANNED, &rq->context->flags); 1348 i915_request_get(rq); 1349 i915_request_add(rq); 1350 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 1351 err = -EIO; 1352 goto out; 1353 } 1354 1355 intel_context_set_banned(rq->context); 1356 err = intel_engine_pulse(arg->engine); 1357 if (err) 1358 goto out; 1359 1360 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1361 err = -EIO; 1362 goto out; 1363 } 1364 1365 if (rq->fence.error != -EIO) { 1366 pr_err("Cancelled inflight0 request did not report -EIO\n"); 1367 err = -EINVAL; 1368 goto out; 1369 } 1370 1371out: 1372 i915_request_put(rq); 1373 if (igt_live_test_end(&t)) 1374 err = -EIO; 1375 return err; 1376} 1377 1378static int __cancel_active1(struct live_preempt_cancel *arg) 1379{ 1380 struct i915_request *rq[2] = {}; 1381 struct igt_live_test t; 1382 int err; 1383 1384 /* Preempt cancel of ELSP1 */ 1385 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1386 if (igt_live_test_begin(&t, arg->engine->i915, 1387 __func__, arg->engine->name)) 1388 return -EIO; 1389 1390 rq[0] = spinner_create_request(&arg->a.spin, 1391 arg->a.ctx, arg->engine, 1392 MI_NOOP); /* no preemption */ 1393 if (IS_ERR(rq[0])) 1394 return PTR_ERR(rq[0]); 1395 1396 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1397 i915_request_get(rq[0]); 1398 i915_request_add(rq[0]); 1399 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1400 err = -EIO; 1401 goto out; 1402 } 1403 1404 rq[1] = spinner_create_request(&arg->b.spin, 1405 arg->b.ctx, arg->engine, 1406 MI_ARB_CHECK); 1407 if (IS_ERR(rq[1])) { 1408 err = PTR_ERR(rq[1]); 1409 goto out; 1410 } 1411 1412 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1413 i915_request_get(rq[1]); 1414 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1415 i915_request_add(rq[1]); 1416 if (err) 1417 goto out; 1418 1419 intel_context_set_banned(rq[1]->context); 1420 err = intel_engine_pulse(arg->engine); 1421 if (err) 1422 goto out; 1423 1424 igt_spinner_end(&arg->a.spin); 1425 if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { 1426 err = -EIO; 1427 goto out; 1428 } 1429 1430 if (rq[0]->fence.error != 0) { 1431 pr_err("Normal inflight0 request did not complete\n"); 1432 err = -EINVAL; 1433 goto out; 1434 } 1435 1436 if (rq[1]->fence.error != -EIO) { 1437 pr_err("Cancelled inflight1 request did not report -EIO\n"); 1438 err = -EINVAL; 1439 goto out; 1440 } 1441 1442out: 1443 i915_request_put(rq[1]); 1444 i915_request_put(rq[0]); 1445 if (igt_live_test_end(&t)) 1446 err = -EIO; 1447 return err; 1448} 1449 1450static int __cancel_queued(struct live_preempt_cancel *arg) 1451{ 1452 struct i915_request *rq[3] = {}; 1453 struct igt_live_test t; 1454 int err; 1455 1456 /* Full ELSP and one in the wings */ 1457 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1458 if (igt_live_test_begin(&t, arg->engine->i915, 1459 __func__, arg->engine->name)) 1460 return -EIO; 1461 1462 rq[0] = spinner_create_request(&arg->a.spin, 1463 arg->a.ctx, arg->engine, 1464 MI_ARB_CHECK); 1465 if (IS_ERR(rq[0])) 1466 return PTR_ERR(rq[0]); 1467 1468 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1469 i915_request_get(rq[0]); 1470 i915_request_add(rq[0]); 1471 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1472 err = -EIO; 1473 goto out; 1474 } 1475 1476 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 1477 if (IS_ERR(rq[1])) { 1478 err = PTR_ERR(rq[1]); 1479 goto out; 1480 } 1481 1482 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1483 i915_request_get(rq[1]); 1484 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1485 i915_request_add(rq[1]); 1486 if (err) 1487 goto out; 1488 1489 rq[2] = spinner_create_request(&arg->b.spin, 1490 arg->a.ctx, arg->engine, 1491 MI_ARB_CHECK); 1492 if (IS_ERR(rq[2])) { 1493 err = PTR_ERR(rq[2]); 1494 goto out; 1495 } 1496 1497 i915_request_get(rq[2]); 1498 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 1499 i915_request_add(rq[2]); 1500 if (err) 1501 goto out; 1502 1503 intel_context_set_banned(rq[2]->context); 1504 err = intel_engine_pulse(arg->engine); 1505 if (err) 1506 goto out; 1507 1508 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { 1509 err = -EIO; 1510 goto out; 1511 } 1512 1513 if (rq[0]->fence.error != -EIO) { 1514 pr_err("Cancelled inflight0 request did not report -EIO\n"); 1515 err = -EINVAL; 1516 goto out; 1517 } 1518 1519 if (rq[1]->fence.error != 0) { 1520 pr_err("Normal inflight1 request did not complete\n"); 1521 err = -EINVAL; 1522 goto out; 1523 } 1524 1525 if (rq[2]->fence.error != -EIO) { 1526 pr_err("Cancelled queued request did not report -EIO\n"); 1527 err = -EINVAL; 1528 goto out; 1529 } 1530 1531out: 1532 i915_request_put(rq[2]); 1533 i915_request_put(rq[1]); 1534 i915_request_put(rq[0]); 1535 if (igt_live_test_end(&t)) 1536 err = -EIO; 1537 return err; 1538} 1539 1540static int __cancel_hostile(struct live_preempt_cancel *arg) 1541{ 1542 struct i915_request *rq; 1543 int err; 1544 1545 /* Preempt cancel non-preemptible spinner in ELSP0 */ 1546 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 1547 return 0; 1548 1549 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1550 rq = spinner_create_request(&arg->a.spin, 1551 arg->a.ctx, arg->engine, 1552 MI_NOOP); /* preemption disabled */ 1553 if (IS_ERR(rq)) 1554 return PTR_ERR(rq); 1555 1556 clear_bit(CONTEXT_BANNED, &rq->context->flags); 1557 i915_request_get(rq); 1558 i915_request_add(rq); 1559 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 1560 err = -EIO; 1561 goto out; 1562 } 1563 1564 intel_context_set_banned(rq->context); 1565 err = intel_engine_pulse(arg->engine); /* force reset */ 1566 if (err) 1567 goto out; 1568 1569 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1570 err = -EIO; 1571 goto out; 1572 } 1573 1574 if (rq->fence.error != -EIO) { 1575 pr_err("Cancelled inflight0 request did not report -EIO\n"); 1576 err = -EINVAL; 1577 goto out; 1578 } 1579 1580out: 1581 i915_request_put(rq); 1582 if (igt_flush_test(arg->engine->i915)) 1583 err = -EIO; 1584 return err; 1585} 1586 1587static int live_preempt_cancel(void *arg) 1588{ 1589 struct intel_gt *gt = arg; 1590 struct live_preempt_cancel data; 1591 enum intel_engine_id id; 1592 int err = -ENOMEM; 1593 1594 /* 1595 * To cancel an inflight context, we need to first remove it from the 1596 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 1597 */ 1598 1599 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1600 return 0; 1601 1602 if (preempt_client_init(gt, &data.a)) 1603 return -ENOMEM; 1604 if (preempt_client_init(gt, &data.b)) 1605 goto err_client_a; 1606 1607 for_each_engine(data.engine, gt, id) { 1608 if (!intel_engine_has_preemption(data.engine)) 1609 continue; 1610 1611 err = __cancel_active0(&data); 1612 if (err) 1613 goto err_wedged; 1614 1615 err = __cancel_active1(&data); 1616 if (err) 1617 goto err_wedged; 1618 1619 err = __cancel_queued(&data); 1620 if (err) 1621 goto err_wedged; 1622 1623 err = __cancel_hostile(&data); 1624 if (err) 1625 goto err_wedged; 1626 } 1627 1628 err = 0; 1629err_client_b: 1630 preempt_client_fini(&data.b); 1631err_client_a: 1632 preempt_client_fini(&data.a); 1633 return err; 1634 1635err_wedged: 1636 GEM_TRACE_DUMP(); 1637 igt_spinner_end(&data.b.spin); 1638 igt_spinner_end(&data.a.spin); 1639 intel_gt_set_wedged(gt); 1640 goto err_client_b; 1641} 1642 1643static int live_suppress_self_preempt(void *arg) 1644{ 1645 struct intel_gt *gt = arg; 1646 struct intel_engine_cs *engine; 1647 struct i915_sched_attr attr = { 1648 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 1649 }; 1650 struct preempt_client a, b; 1651 enum intel_engine_id id; 1652 int err = -ENOMEM; 1653 1654 /* 1655 * Verify that if a preemption request does not cause a change in 1656 * the current execution order, the preempt-to-idle injection is 1657 * skipped and that we do not accidentally apply it after the CS 1658 * completion event. 1659 */ 1660 1661 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1662 return 0; 1663 1664 if (USES_GUC_SUBMISSION(gt->i915)) 1665 return 0; /* presume black blox */ 1666 1667 if (intel_vgpu_active(gt->i915)) 1668 return 0; /* GVT forces single port & request submission */ 1669 1670 if (preempt_client_init(gt, &a)) 1671 return -ENOMEM; 1672 if (preempt_client_init(gt, &b)) 1673 goto err_client_a; 1674 1675 for_each_engine(engine, gt, id) { 1676 struct i915_request *rq_a, *rq_b; 1677 int depth; 1678 1679 if (!intel_engine_has_preemption(engine)) 1680 continue; 1681 1682 if (igt_flush_test(gt->i915)) 1683 goto err_wedged; 1684 1685 intel_engine_pm_get(engine); 1686 engine->execlists.preempt_hang.count = 0; 1687 1688 rq_a = spinner_create_request(&a.spin, 1689 a.ctx, engine, 1690 MI_NOOP); 1691 if (IS_ERR(rq_a)) { 1692 err = PTR_ERR(rq_a); 1693 intel_engine_pm_put(engine); 1694 goto err_client_b; 1695 } 1696 1697 i915_request_add(rq_a); 1698 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1699 pr_err("First client failed to start\n"); 1700 intel_engine_pm_put(engine); 1701 goto err_wedged; 1702 } 1703 1704 /* Keep postponing the timer to avoid premature slicing */ 1705 mod_timer(&engine->execlists.timer, jiffies + HZ); 1706 for (depth = 0; depth < 8; depth++) { 1707 rq_b = spinner_create_request(&b.spin, 1708 b.ctx, engine, 1709 MI_NOOP); 1710 if (IS_ERR(rq_b)) { 1711 err = PTR_ERR(rq_b); 1712 intel_engine_pm_put(engine); 1713 goto err_client_b; 1714 } 1715 i915_request_add(rq_b); 1716 1717 GEM_BUG_ON(i915_request_completed(rq_a)); 1718 engine->schedule(rq_a, &attr); 1719 igt_spinner_end(&a.spin); 1720 1721 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1722 pr_err("Second client failed to start\n"); 1723 intel_engine_pm_put(engine); 1724 goto err_wedged; 1725 } 1726 1727 swap(a, b); 1728 rq_a = rq_b; 1729 } 1730 igt_spinner_end(&a.spin); 1731 1732 if (engine->execlists.preempt_hang.count) { 1733 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 1734 engine->name, 1735 engine->execlists.preempt_hang.count, 1736 depth); 1737 intel_engine_pm_put(engine); 1738 err = -EINVAL; 1739 goto err_client_b; 1740 } 1741 1742 intel_engine_pm_put(engine); 1743 if (igt_flush_test(gt->i915)) 1744 goto err_wedged; 1745 } 1746 1747 err = 0; 1748err_client_b: 1749 preempt_client_fini(&b); 1750err_client_a: 1751 preempt_client_fini(&a); 1752 return err; 1753 1754err_wedged: 1755 igt_spinner_end(&b.spin); 1756 igt_spinner_end(&a.spin); 1757 intel_gt_set_wedged(gt); 1758 err = -EIO; 1759 goto err_client_b; 1760} 1761 1762static int __i915_sw_fence_call 1763dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 1764{ 1765 return NOTIFY_DONE; 1766} 1767 1768static struct i915_request *dummy_request(struct intel_engine_cs *engine) 1769{ 1770 struct i915_request *rq; 1771 1772 rq = kzalloc(sizeof(*rq), GFP_KERNEL); 1773 if (!rq) 1774 return NULL; 1775 1776 rq->engine = engine; 1777 1778 spin_lock_init(&rq->lock); 1779 INIT_LIST_HEAD(&rq->fence.cb_list); 1780 rq->fence.lock = &rq->lock; 1781 rq->fence.ops = &i915_fence_ops; 1782 1783 i915_sched_node_init(&rq->sched); 1784 1785 /* mark this request as permanently incomplete */ 1786 rq->fence.seqno = 1; 1787 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ 1788 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; 1789 GEM_BUG_ON(i915_request_completed(rq)); 1790 1791 i915_sw_fence_init(&rq->submit, dummy_notify); 1792 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 1793 1794 spin_lock_init(&rq->lock); 1795 rq->fence.lock = &rq->lock; 1796 INIT_LIST_HEAD(&rq->fence.cb_list); 1797 1798 return rq; 1799} 1800 1801static void dummy_request_free(struct i915_request *dummy) 1802{ 1803 /* We have to fake the CS interrupt to kick the next request */ 1804 i915_sw_fence_commit(&dummy->submit); 1805 1806 i915_request_mark_complete(dummy); 1807 dma_fence_signal(&dummy->fence); 1808 1809 i915_sched_node_fini(&dummy->sched); 1810 i915_sw_fence_fini(&dummy->submit); 1811 1812 dma_fence_free(&dummy->fence); 1813} 1814 1815static int live_suppress_wait_preempt(void *arg) 1816{ 1817 struct intel_gt *gt = arg; 1818 struct preempt_client client[4]; 1819 struct i915_request *rq[ARRAY_SIZE(client)] = {}; 1820 struct intel_engine_cs *engine; 1821 enum intel_engine_id id; 1822 int err = -ENOMEM; 1823 int i; 1824 1825 /* 1826 * Waiters are given a little priority nudge, but not enough 1827 * to actually cause any preemption. Double check that we do 1828 * not needlessly generate preempt-to-idle cycles. 1829 */ 1830 1831 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1832 return 0; 1833 1834 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ 1835 return -ENOMEM; 1836 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ 1837 goto err_client_0; 1838 if (preempt_client_init(gt, &client[2])) /* head of queue */ 1839 goto err_client_1; 1840 if (preempt_client_init(gt, &client[3])) /* bystander */ 1841 goto err_client_2; 1842 1843 for_each_engine(engine, gt, id) { 1844 int depth; 1845 1846 if (!intel_engine_has_preemption(engine)) 1847 continue; 1848 1849 if (!engine->emit_init_breadcrumb) 1850 continue; 1851 1852 for (depth = 0; depth < ARRAY_SIZE(client); depth++) { 1853 struct i915_request *dummy; 1854 1855 engine->execlists.preempt_hang.count = 0; 1856 1857 dummy = dummy_request(engine); 1858 if (!dummy) 1859 goto err_client_3; 1860 1861 for (i = 0; i < ARRAY_SIZE(client); i++) { 1862 struct i915_request *this; 1863 1864 this = spinner_create_request(&client[i].spin, 1865 client[i].ctx, engine, 1866 MI_NOOP); 1867 if (IS_ERR(this)) { 1868 err = PTR_ERR(this); 1869 goto err_wedged; 1870 } 1871 1872 /* Disable NEWCLIENT promotion */ 1873 __i915_active_fence_set(&i915_request_timeline(this)->last_request, 1874 &dummy->fence); 1875 1876 rq[i] = i915_request_get(this); 1877 i915_request_add(this); 1878 } 1879 1880 dummy_request_free(dummy); 1881 1882 GEM_BUG_ON(i915_request_completed(rq[0])); 1883 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { 1884 pr_err("%s: First client failed to start\n", 1885 engine->name); 1886 goto err_wedged; 1887 } 1888 GEM_BUG_ON(!i915_request_started(rq[0])); 1889 1890 if (i915_request_wait(rq[depth], 1891 I915_WAIT_PRIORITY, 1892 1) != -ETIME) { 1893 pr_err("%s: Waiter depth:%d completed!\n", 1894 engine->name, depth); 1895 goto err_wedged; 1896 } 1897 1898 for (i = 0; i < ARRAY_SIZE(client); i++) { 1899 igt_spinner_end(&client[i].spin); 1900 i915_request_put(rq[i]); 1901 rq[i] = NULL; 1902 } 1903 1904 if (igt_flush_test(gt->i915)) 1905 goto err_wedged; 1906 1907 if (engine->execlists.preempt_hang.count) { 1908 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", 1909 engine->name, 1910 engine->execlists.preempt_hang.count, 1911 depth); 1912 err = -EINVAL; 1913 goto err_client_3; 1914 } 1915 } 1916 } 1917 1918 err = 0; 1919err_client_3: 1920 preempt_client_fini(&client[3]); 1921err_client_2: 1922 preempt_client_fini(&client[2]); 1923err_client_1: 1924 preempt_client_fini(&client[1]); 1925err_client_0: 1926 preempt_client_fini(&client[0]); 1927 return err; 1928 1929err_wedged: 1930 for (i = 0; i < ARRAY_SIZE(client); i++) { 1931 igt_spinner_end(&client[i].spin); 1932 i915_request_put(rq[i]); 1933 } 1934 intel_gt_set_wedged(gt); 1935 err = -EIO; 1936 goto err_client_3; 1937} 1938 1939static int live_chain_preempt(void *arg) 1940{ 1941 struct intel_gt *gt = arg; 1942 struct intel_engine_cs *engine; 1943 struct preempt_client hi, lo; 1944 enum intel_engine_id id; 1945 int err = -ENOMEM; 1946 1947 /* 1948 * Build a chain AB...BA between two contexts (A, B) and request 1949 * preemption of the last request. It should then complete before 1950 * the previously submitted spinner in B. 1951 */ 1952 1953 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1954 return 0; 1955 1956 if (preempt_client_init(gt, &hi)) 1957 return -ENOMEM; 1958 1959 if (preempt_client_init(gt, &lo)) 1960 goto err_client_hi; 1961 1962 for_each_engine(engine, gt, id) { 1963 struct i915_sched_attr attr = { 1964 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1965 }; 1966 struct igt_live_test t; 1967 struct i915_request *rq; 1968 int ring_size, count, i; 1969 1970 if (!intel_engine_has_preemption(engine)) 1971 continue; 1972 1973 rq = spinner_create_request(&lo.spin, 1974 lo.ctx, engine, 1975 MI_ARB_CHECK); 1976 if (IS_ERR(rq)) 1977 goto err_wedged; 1978 1979 i915_request_get(rq); 1980 i915_request_add(rq); 1981 1982 ring_size = rq->wa_tail - rq->head; 1983 if (ring_size < 0) 1984 ring_size += rq->ring->size; 1985 ring_size = rq->ring->size / ring_size; 1986 pr_debug("%s(%s): Using maximum of %d requests\n", 1987 __func__, engine->name, ring_size); 1988 1989 igt_spinner_end(&lo.spin); 1990 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 1991 pr_err("Timed out waiting to flush %s\n", engine->name); 1992 i915_request_put(rq); 1993 goto err_wedged; 1994 } 1995 i915_request_put(rq); 1996 1997 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1998 err = -EIO; 1999 goto err_wedged; 2000 } 2001 2002 for_each_prime_number_from(count, 1, ring_size) { 2003 rq = spinner_create_request(&hi.spin, 2004 hi.ctx, engine, 2005 MI_ARB_CHECK); 2006 if (IS_ERR(rq)) 2007 goto err_wedged; 2008 i915_request_add(rq); 2009 if (!igt_wait_for_spinner(&hi.spin, rq)) 2010 goto err_wedged; 2011 2012 rq = spinner_create_request(&lo.spin, 2013 lo.ctx, engine, 2014 MI_ARB_CHECK); 2015 if (IS_ERR(rq)) 2016 goto err_wedged; 2017 i915_request_add(rq); 2018 2019 for (i = 0; i < count; i++) { 2020 rq = igt_request_alloc(lo.ctx, engine); 2021 if (IS_ERR(rq)) 2022 goto err_wedged; 2023 i915_request_add(rq); 2024 } 2025 2026 rq = igt_request_alloc(hi.ctx, engine); 2027 if (IS_ERR(rq)) 2028 goto err_wedged; 2029 2030 i915_request_get(rq); 2031 i915_request_add(rq); 2032 engine->schedule(rq, &attr); 2033 2034 igt_spinner_end(&hi.spin); 2035 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2036 struct drm_printer p = 2037 drm_info_printer(gt->i915->drm.dev); 2038 2039 pr_err("Failed to preempt over chain of %d\n", 2040 count); 2041 intel_engine_dump(engine, &p, 2042 "%s\n", engine->name); 2043 i915_request_put(rq); 2044 goto err_wedged; 2045 } 2046 igt_spinner_end(&lo.spin); 2047 i915_request_put(rq); 2048 2049 rq = igt_request_alloc(lo.ctx, engine); 2050 if (IS_ERR(rq)) 2051 goto err_wedged; 2052 2053 i915_request_get(rq); 2054 i915_request_add(rq); 2055 2056 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2057 struct drm_printer p = 2058 drm_info_printer(gt->i915->drm.dev); 2059 2060 pr_err("Failed to flush low priority chain of %d requests\n", 2061 count); 2062 intel_engine_dump(engine, &p, 2063 "%s\n", engine->name); 2064 2065 i915_request_put(rq); 2066 goto err_wedged; 2067 } 2068 i915_request_put(rq); 2069 } 2070 2071 if (igt_live_test_end(&t)) { 2072 err = -EIO; 2073 goto err_wedged; 2074 } 2075 } 2076 2077 err = 0; 2078err_client_lo: 2079 preempt_client_fini(&lo); 2080err_client_hi: 2081 preempt_client_fini(&hi); 2082 return err; 2083 2084err_wedged: 2085 igt_spinner_end(&hi.spin); 2086 igt_spinner_end(&lo.spin); 2087 intel_gt_set_wedged(gt); 2088 err = -EIO; 2089 goto err_client_lo; 2090} 2091 2092static int create_gang(struct intel_engine_cs *engine, 2093 struct i915_request **prev) 2094{ 2095 struct drm_i915_gem_object *obj; 2096 struct intel_context *ce; 2097 struct i915_request *rq; 2098 struct i915_vma *vma; 2099 u32 *cs; 2100 int err; 2101 2102 ce = intel_context_create(engine); 2103 if (IS_ERR(ce)) 2104 return PTR_ERR(ce); 2105 2106 obj = i915_gem_object_create_internal(engine->i915, 4096); 2107 if (IS_ERR(obj)) { 2108 err = PTR_ERR(obj); 2109 goto err_ce; 2110 } 2111 2112 vma = i915_vma_instance(obj, ce->vm, NULL); 2113 if (IS_ERR(vma)) { 2114 err = PTR_ERR(vma); 2115 goto err_obj; 2116 } 2117 2118 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2119 if (err) 2120 goto err_obj; 2121 2122 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2123 if (IS_ERR(cs)) 2124 goto err_obj; 2125 2126 /* Semaphore target: spin until zero */ 2127 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2128 2129 *cs++ = MI_SEMAPHORE_WAIT | 2130 MI_SEMAPHORE_POLL | 2131 MI_SEMAPHORE_SAD_EQ_SDD; 2132 *cs++ = 0; 2133 *cs++ = lower_32_bits(vma->node.start); 2134 *cs++ = upper_32_bits(vma->node.start); 2135 2136 if (*prev) { 2137 u64 offset = (*prev)->batch->node.start; 2138 2139 /* Terminate the spinner in the next lower priority batch. */ 2140 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2141 *cs++ = lower_32_bits(offset); 2142 *cs++ = upper_32_bits(offset); 2143 *cs++ = 0; 2144 } 2145 2146 *cs++ = MI_BATCH_BUFFER_END; 2147 i915_gem_object_flush_map(obj); 2148 i915_gem_object_unpin_map(obj); 2149 2150 rq = intel_context_create_request(ce); 2151 if (IS_ERR(rq)) 2152 goto err_obj; 2153 2154 rq->batch = vma; 2155 i915_request_get(rq); 2156 2157 i915_vma_lock(vma); 2158 err = i915_request_await_object(rq, vma->obj, false); 2159 if (!err) 2160 err = i915_vma_move_to_active(vma, rq, 0); 2161 if (!err) 2162 err = rq->engine->emit_bb_start(rq, 2163 vma->node.start, 2164 PAGE_SIZE, 0); 2165 i915_vma_unlock(vma); 2166 i915_request_add(rq); 2167 if (err) 2168 goto err_rq; 2169 2170 i915_gem_object_put(obj); 2171 intel_context_put(ce); 2172 2173 rq->client_link.next = &(*prev)->client_link; 2174 *prev = rq; 2175 return 0; 2176 2177err_rq: 2178 i915_request_put(rq); 2179err_obj: 2180 i915_gem_object_put(obj); 2181err_ce: 2182 intel_context_put(ce); 2183 return err; 2184} 2185 2186static int live_preempt_gang(void *arg) 2187{ 2188 struct intel_gt *gt = arg; 2189 struct intel_engine_cs *engine; 2190 enum intel_engine_id id; 2191 2192 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2193 return 0; 2194 2195 /* 2196 * Build as long a chain of preempters as we can, with each 2197 * request higher priority than the last. Once we are ready, we release 2198 * the last batch which then precolates down the chain, each releasing 2199 * the next oldest in turn. The intent is to simply push as hard as we 2200 * can with the number of preemptions, trying to exceed narrow HW 2201 * limits. At a minimum, we insist that we can sort all the user 2202 * high priority levels into execution order. 2203 */ 2204 2205 for_each_engine(engine, gt, id) { 2206 struct i915_request *rq = NULL; 2207 struct igt_live_test t; 2208 IGT_TIMEOUT(end_time); 2209 int prio = 0; 2210 int err = 0; 2211 u32 *cs; 2212 2213 if (!intel_engine_has_preemption(engine)) 2214 continue; 2215 2216 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2217 return -EIO; 2218 2219 do { 2220 struct i915_sched_attr attr = { 2221 .priority = I915_USER_PRIORITY(prio++), 2222 }; 2223 2224 err = create_gang(engine, &rq); 2225 if (err) 2226 break; 2227 2228 /* Submit each spinner at increasing priority */ 2229 engine->schedule(rq, &attr); 2230 2231 if (prio <= I915_PRIORITY_MAX) 2232 continue; 2233 2234 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) 2235 break; 2236 2237 if (__igt_timeout(end_time, NULL)) 2238 break; 2239 } while (1); 2240 pr_debug("%s: Preempt chain of %d requests\n", 2241 engine->name, prio); 2242 2243 /* 2244 * Such that the last spinner is the highest priority and 2245 * should execute first. When that spinner completes, 2246 * it will terminate the next lowest spinner until there 2247 * are no more spinners and the gang is complete. 2248 */ 2249 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2250 if (!IS_ERR(cs)) { 2251 *cs = 0; 2252 i915_gem_object_unpin_map(rq->batch->obj); 2253 } else { 2254 err = PTR_ERR(cs); 2255 intel_gt_set_wedged(gt); 2256 } 2257 2258 while (rq) { /* wait for each rq from highest to lowest prio */ 2259 struct i915_request *n = 2260 list_next_entry(rq, client_link); 2261 2262 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2263 struct drm_printer p = 2264 drm_info_printer(engine->i915->drm.dev); 2265 2266 pr_err("Failed to flush chain of %d requests, at %d\n", 2267 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2268 intel_engine_dump(engine, &p, 2269 "%s\n", engine->name); 2270 2271 err = -ETIME; 2272 } 2273 2274 i915_request_put(rq); 2275 rq = n; 2276 } 2277 2278 if (igt_live_test_end(&t)) 2279 err = -EIO; 2280 if (err) 2281 return err; 2282 } 2283 2284 return 0; 2285} 2286 2287static int live_preempt_hang(void *arg) 2288{ 2289 struct intel_gt *gt = arg; 2290 struct i915_gem_context *ctx_hi, *ctx_lo; 2291 struct igt_spinner spin_hi, spin_lo; 2292 struct intel_engine_cs *engine; 2293 enum intel_engine_id id; 2294 int err = -ENOMEM; 2295 2296 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2297 return 0; 2298 2299 if (!intel_has_reset_engine(gt)) 2300 return 0; 2301 2302 if (igt_spinner_init(&spin_hi, gt)) 2303 return -ENOMEM; 2304 2305 if (igt_spinner_init(&spin_lo, gt)) 2306 goto err_spin_hi; 2307 2308 ctx_hi = kernel_context(gt->i915); 2309 if (!ctx_hi) 2310 goto err_spin_lo; 2311 ctx_hi->sched.priority = 2312 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 2313 2314 ctx_lo = kernel_context(gt->i915); 2315 if (!ctx_lo) 2316 goto err_ctx_hi; 2317 ctx_lo->sched.priority = 2318 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 2319 2320 for_each_engine(engine, gt, id) { 2321 struct i915_request *rq; 2322 2323 if (!intel_engine_has_preemption(engine)) 2324 continue; 2325 2326 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 2327 MI_ARB_CHECK); 2328 if (IS_ERR(rq)) { 2329 err = PTR_ERR(rq); 2330 goto err_ctx_lo; 2331 } 2332 2333 i915_request_add(rq); 2334 if (!igt_wait_for_spinner(&spin_lo, rq)) { 2335 GEM_TRACE("lo spinner failed to start\n"); 2336 GEM_TRACE_DUMP(); 2337 intel_gt_set_wedged(gt); 2338 err = -EIO; 2339 goto err_ctx_lo; 2340 } 2341 2342 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 2343 MI_ARB_CHECK); 2344 if (IS_ERR(rq)) { 2345 igt_spinner_end(&spin_lo); 2346 err = PTR_ERR(rq); 2347 goto err_ctx_lo; 2348 } 2349 2350 init_completion(&engine->execlists.preempt_hang.completion); 2351 engine->execlists.preempt_hang.inject_hang = true; 2352 2353 i915_request_add(rq); 2354 2355 if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion, 2356 HZ / 10)) { 2357 pr_err("Preemption did not occur within timeout!"); 2358 GEM_TRACE_DUMP(); 2359 intel_gt_set_wedged(gt); 2360 err = -EIO; 2361 goto err_ctx_lo; 2362 } 2363 2364 set_bit(I915_RESET_ENGINE + id, >->reset.flags); 2365 intel_engine_reset(engine, NULL); 2366 clear_bit(I915_RESET_ENGINE + id, >->reset.flags); 2367 2368 engine->execlists.preempt_hang.inject_hang = false; 2369 2370 if (!igt_wait_for_spinner(&spin_hi, rq)) { 2371 GEM_TRACE("hi spinner failed to start\n"); 2372 GEM_TRACE_DUMP(); 2373 intel_gt_set_wedged(gt); 2374 err = -EIO; 2375 goto err_ctx_lo; 2376 } 2377 2378 igt_spinner_end(&spin_hi); 2379 igt_spinner_end(&spin_lo); 2380 if (igt_flush_test(gt->i915)) { 2381 err = -EIO; 2382 goto err_ctx_lo; 2383 } 2384 } 2385 2386 err = 0; 2387err_ctx_lo: 2388 kernel_context_close(ctx_lo); 2389err_ctx_hi: 2390 kernel_context_close(ctx_hi); 2391err_spin_lo: 2392 igt_spinner_fini(&spin_lo); 2393err_spin_hi: 2394 igt_spinner_fini(&spin_hi); 2395 return err; 2396} 2397 2398static int live_preempt_timeout(void *arg) 2399{ 2400 struct intel_gt *gt = arg; 2401 struct i915_gem_context *ctx_hi, *ctx_lo; 2402 struct igt_spinner spin_lo; 2403 struct intel_engine_cs *engine; 2404 enum intel_engine_id id; 2405 int err = -ENOMEM; 2406 2407 /* 2408 * Check that we force preemption to occur by cancelling the previous 2409 * context if it refuses to yield the GPU. 2410 */ 2411 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2412 return 0; 2413 2414 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2415 return 0; 2416 2417 if (!intel_has_reset_engine(gt)) 2418 return 0; 2419 2420 if (igt_spinner_init(&spin_lo, gt)) 2421 return -ENOMEM; 2422 2423 ctx_hi = kernel_context(gt->i915); 2424 if (!ctx_hi) 2425 goto err_spin_lo; 2426 ctx_hi->sched.priority = 2427 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 2428 2429 ctx_lo = kernel_context(gt->i915); 2430 if (!ctx_lo) 2431 goto err_ctx_hi; 2432 ctx_lo->sched.priority = 2433 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 2434 2435 for_each_engine(engine, gt, id) { 2436 unsigned long saved_timeout; 2437 struct i915_request *rq; 2438 2439 if (!intel_engine_has_preemption(engine)) 2440 continue; 2441 2442 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 2443 MI_NOOP); /* preemption disabled */ 2444 if (IS_ERR(rq)) { 2445 err = PTR_ERR(rq); 2446 goto err_ctx_lo; 2447 } 2448 2449 i915_request_add(rq); 2450 if (!igt_wait_for_spinner(&spin_lo, rq)) { 2451 intel_gt_set_wedged(gt); 2452 err = -EIO; 2453 goto err_ctx_lo; 2454 } 2455 2456 rq = igt_request_alloc(ctx_hi, engine); 2457 if (IS_ERR(rq)) { 2458 igt_spinner_end(&spin_lo); 2459 err = PTR_ERR(rq); 2460 goto err_ctx_lo; 2461 } 2462 2463 /* Flush the previous CS ack before changing timeouts */ 2464 while (READ_ONCE(engine->execlists.pending[0])) 2465 cpu_relax(); 2466 2467 saved_timeout = engine->props.preempt_timeout_ms; 2468 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 2469 2470 i915_request_get(rq); 2471 i915_request_add(rq); 2472 2473 intel_engine_flush_submission(engine); 2474 engine->props.preempt_timeout_ms = saved_timeout; 2475 2476 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 2477 intel_gt_set_wedged(gt); 2478 i915_request_put(rq); 2479 err = -ETIME; 2480 goto err_ctx_lo; 2481 } 2482 2483 igt_spinner_end(&spin_lo); 2484 i915_request_put(rq); 2485 } 2486 2487 err = 0; 2488err_ctx_lo: 2489 kernel_context_close(ctx_lo); 2490err_ctx_hi: 2491 kernel_context_close(ctx_hi); 2492err_spin_lo: 2493 igt_spinner_fini(&spin_lo); 2494 return err; 2495} 2496 2497static int random_range(struct rnd_state *rnd, int min, int max) 2498{ 2499 return i915_prandom_u32_max_state(max - min, rnd) + min; 2500} 2501 2502static int random_priority(struct rnd_state *rnd) 2503{ 2504 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 2505} 2506 2507struct preempt_smoke { 2508 struct intel_gt *gt; 2509 struct i915_gem_context **contexts; 2510 struct intel_engine_cs *engine; 2511 struct drm_i915_gem_object *batch; 2512 unsigned int ncontext; 2513 struct rnd_state prng; 2514 unsigned long count; 2515}; 2516 2517static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 2518{ 2519 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 2520 &smoke->prng)]; 2521} 2522 2523static int smoke_submit(struct preempt_smoke *smoke, 2524 struct i915_gem_context *ctx, int prio, 2525 struct drm_i915_gem_object *batch) 2526{ 2527 struct i915_request *rq; 2528 struct i915_vma *vma = NULL; 2529 int err = 0; 2530 2531 if (batch) { 2532 struct i915_address_space *vm; 2533 2534 vm = i915_gem_context_get_vm_rcu(ctx); 2535 vma = i915_vma_instance(batch, vm, NULL); 2536 i915_vm_put(vm); 2537 if (IS_ERR(vma)) 2538 return PTR_ERR(vma); 2539 2540 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2541 if (err) 2542 return err; 2543 } 2544 2545 ctx->sched.priority = prio; 2546 2547 rq = igt_request_alloc(ctx, smoke->engine); 2548 if (IS_ERR(rq)) { 2549 err = PTR_ERR(rq); 2550 goto unpin; 2551 } 2552 2553 if (vma) { 2554 i915_vma_lock(vma); 2555 err = i915_request_await_object(rq, vma->obj, false); 2556 if (!err) 2557 err = i915_vma_move_to_active(vma, rq, 0); 2558 if (!err) 2559 err = rq->engine->emit_bb_start(rq, 2560 vma->node.start, 2561 PAGE_SIZE, 0); 2562 i915_vma_unlock(vma); 2563 } 2564 2565 i915_request_add(rq); 2566 2567unpin: 2568 if (vma) 2569 i915_vma_unpin(vma); 2570 2571 return err; 2572} 2573 2574static int smoke_crescendo_thread(void *arg) 2575{ 2576 struct preempt_smoke *smoke = arg; 2577 IGT_TIMEOUT(end_time); 2578 unsigned long count; 2579 2580 count = 0; 2581 do { 2582 struct i915_gem_context *ctx = smoke_context(smoke); 2583 int err; 2584 2585 err = smoke_submit(smoke, 2586 ctx, count % I915_PRIORITY_MAX, 2587 smoke->batch); 2588 if (err) 2589 return err; 2590 2591 count++; 2592 } while (!__igt_timeout(end_time, NULL)); 2593 2594 smoke->count = count; 2595 return 0; 2596} 2597 2598static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 2599#define BATCH BIT(0) 2600{ 2601 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 2602 struct preempt_smoke arg[I915_NUM_ENGINES]; 2603 struct intel_engine_cs *engine; 2604 enum intel_engine_id id; 2605 unsigned long count; 2606 int err = 0; 2607 2608 for_each_engine(engine, smoke->gt, id) { 2609 arg[id] = *smoke; 2610 arg[id].engine = engine; 2611 if (!(flags & BATCH)) 2612 arg[id].batch = NULL; 2613 arg[id].count = 0; 2614 2615 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 2616 "igt/smoke:%d", id); 2617 if (IS_ERR(tsk[id])) { 2618 err = PTR_ERR(tsk[id]); 2619 break; 2620 } 2621 get_task_struct(tsk[id]); 2622 } 2623 2624 yield(); /* start all threads before we kthread_stop() */ 2625 2626 count = 0; 2627 for_each_engine(engine, smoke->gt, id) { 2628 int status; 2629 2630 if (IS_ERR_OR_NULL(tsk[id])) 2631 continue; 2632 2633 status = kthread_stop(tsk[id]); 2634 if (status && !err) 2635 err = status; 2636 2637 count += arg[id].count; 2638 2639 put_task_struct(tsk[id]); 2640 } 2641 2642 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 2643 count, flags, 2644 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 2645 return 0; 2646} 2647 2648static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 2649{ 2650 enum intel_engine_id id; 2651 IGT_TIMEOUT(end_time); 2652 unsigned long count; 2653 2654 count = 0; 2655 do { 2656 for_each_engine(smoke->engine, smoke->gt, id) { 2657 struct i915_gem_context *ctx = smoke_context(smoke); 2658 int err; 2659 2660 err = smoke_submit(smoke, 2661 ctx, random_priority(&smoke->prng), 2662 flags & BATCH ? smoke->batch : NULL); 2663 if (err) 2664 return err; 2665 2666 count++; 2667 } 2668 } while (!__igt_timeout(end_time, NULL)); 2669 2670 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 2671 count, flags, 2672 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 2673 return 0; 2674} 2675 2676static int live_preempt_smoke(void *arg) 2677{ 2678 struct preempt_smoke smoke = { 2679 .gt = arg, 2680 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 2681 .ncontext = 1024, 2682 }; 2683 const unsigned int phase[] = { 0, BATCH }; 2684 struct igt_live_test t; 2685 int err = -ENOMEM; 2686 u32 *cs; 2687 int n; 2688 2689 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 2690 return 0; 2691 2692 smoke.contexts = kmalloc_array(smoke.ncontext, 2693 sizeof(*smoke.contexts), 2694 GFP_KERNEL); 2695 if (!smoke.contexts) 2696 return -ENOMEM; 2697 2698 smoke.batch = 2699 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 2700 if (IS_ERR(smoke.batch)) { 2701 err = PTR_ERR(smoke.batch); 2702 goto err_free; 2703 } 2704 2705 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 2706 if (IS_ERR(cs)) { 2707 err = PTR_ERR(cs); 2708 goto err_batch; 2709 } 2710 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 2711 cs[n] = MI_ARB_CHECK; 2712 cs[n] = MI_BATCH_BUFFER_END; 2713 i915_gem_object_flush_map(smoke.batch); 2714 i915_gem_object_unpin_map(smoke.batch); 2715 2716 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 2717 err = -EIO; 2718 goto err_batch; 2719 } 2720 2721 for (n = 0; n < smoke.ncontext; n++) { 2722 smoke.contexts[n] = kernel_context(smoke.gt->i915); 2723 if (!smoke.contexts[n]) 2724 goto err_ctx; 2725 } 2726 2727 for (n = 0; n < ARRAY_SIZE(phase); n++) { 2728 err = smoke_crescendo(&smoke, phase[n]); 2729 if (err) 2730 goto err_ctx; 2731 2732 err = smoke_random(&smoke, phase[n]); 2733 if (err) 2734 goto err_ctx; 2735 } 2736 2737err_ctx: 2738 if (igt_live_test_end(&t)) 2739 err = -EIO; 2740 2741 for (n = 0; n < smoke.ncontext; n++) { 2742 if (!smoke.contexts[n]) 2743 break; 2744 kernel_context_close(smoke.contexts[n]); 2745 } 2746 2747err_batch: 2748 i915_gem_object_put(smoke.batch); 2749err_free: 2750 kfree(smoke.contexts); 2751 2752 return err; 2753} 2754 2755static int nop_virtual_engine(struct intel_gt *gt, 2756 struct intel_engine_cs **siblings, 2757 unsigned int nsibling, 2758 unsigned int nctx, 2759 unsigned int flags) 2760#define CHAIN BIT(0) 2761{ 2762 IGT_TIMEOUT(end_time); 2763 struct i915_request *request[16] = {}; 2764 struct intel_context *ve[16]; 2765 unsigned long n, prime, nc; 2766 struct igt_live_test t; 2767 ktime_t times[2] = {}; 2768 int err; 2769 2770 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 2771 2772 for (n = 0; n < nctx; n++) { 2773 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 2774 if (IS_ERR(ve[n])) { 2775 err = PTR_ERR(ve[n]); 2776 nctx = n; 2777 goto out; 2778 } 2779 2780 err = intel_context_pin(ve[n]); 2781 if (err) { 2782 intel_context_put(ve[n]); 2783 nctx = n; 2784 goto out; 2785 } 2786 } 2787 2788 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 2789 if (err) 2790 goto out; 2791 2792 for_each_prime_number_from(prime, 1, 8192) { 2793 times[1] = ktime_get_raw(); 2794 2795 if (flags & CHAIN) { 2796 for (nc = 0; nc < nctx; nc++) { 2797 for (n = 0; n < prime; n++) { 2798 struct i915_request *rq; 2799 2800 rq = i915_request_create(ve[nc]); 2801 if (IS_ERR(rq)) { 2802 err = PTR_ERR(rq); 2803 goto out; 2804 } 2805 2806 if (request[nc]) 2807 i915_request_put(request[nc]); 2808 request[nc] = i915_request_get(rq); 2809 i915_request_add(rq); 2810 } 2811 } 2812 } else { 2813 for (n = 0; n < prime; n++) { 2814 for (nc = 0; nc < nctx; nc++) { 2815 struct i915_request *rq; 2816 2817 rq = i915_request_create(ve[nc]); 2818 if (IS_ERR(rq)) { 2819 err = PTR_ERR(rq); 2820 goto out; 2821 } 2822 2823 if (request[nc]) 2824 i915_request_put(request[nc]); 2825 request[nc] = i915_request_get(rq); 2826 i915_request_add(rq); 2827 } 2828 } 2829 } 2830 2831 for (nc = 0; nc < nctx; nc++) { 2832 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 2833 pr_err("%s(%s): wait for %llx:%lld timed out\n", 2834 __func__, ve[0]->engine->name, 2835 request[nc]->fence.context, 2836 request[nc]->fence.seqno); 2837 2838 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 2839 __func__, ve[0]->engine->name, 2840 request[nc]->fence.context, 2841 request[nc]->fence.seqno); 2842 GEM_TRACE_DUMP(); 2843 intel_gt_set_wedged(gt); 2844 break; 2845 } 2846 } 2847 2848 times[1] = ktime_sub(ktime_get_raw(), times[1]); 2849 if (prime == 1) 2850 times[0] = times[1]; 2851 2852 for (nc = 0; nc < nctx; nc++) { 2853 i915_request_put(request[nc]); 2854 request[nc] = NULL; 2855 } 2856 2857 if (__igt_timeout(end_time, NULL)) 2858 break; 2859 } 2860 2861 err = igt_live_test_end(&t); 2862 if (err) 2863 goto out; 2864 2865 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 2866 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 2867 prime, div64_u64(ktime_to_ns(times[1]), prime)); 2868 2869out: 2870 if (igt_flush_test(gt->i915)) 2871 err = -EIO; 2872 2873 for (nc = 0; nc < nctx; nc++) { 2874 i915_request_put(request[nc]); 2875 intel_context_unpin(ve[nc]); 2876 intel_context_put(ve[nc]); 2877 } 2878 return err; 2879} 2880 2881static int live_virtual_engine(void *arg) 2882{ 2883 struct intel_gt *gt = arg; 2884 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 2885 struct intel_engine_cs *engine; 2886 enum intel_engine_id id; 2887 unsigned int class, inst; 2888 int err; 2889 2890 if (USES_GUC_SUBMISSION(gt->i915)) 2891 return 0; 2892 2893 for_each_engine(engine, gt, id) { 2894 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 2895 if (err) { 2896 pr_err("Failed to wrap engine %s: err=%d\n", 2897 engine->name, err); 2898 return err; 2899 } 2900 } 2901 2902 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 2903 int nsibling, n; 2904 2905 nsibling = 0; 2906 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 2907 if (!gt->engine_class[class][inst]) 2908 continue; 2909 2910 siblings[nsibling++] = gt->engine_class[class][inst]; 2911 } 2912 if (nsibling < 2) 2913 continue; 2914 2915 for (n = 1; n <= nsibling + 1; n++) { 2916 err = nop_virtual_engine(gt, siblings, nsibling, 2917 n, 0); 2918 if (err) 2919 return err; 2920 } 2921 2922 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 2923 if (err) 2924 return err; 2925 } 2926 2927 return 0; 2928} 2929 2930static int mask_virtual_engine(struct intel_gt *gt, 2931 struct intel_engine_cs **siblings, 2932 unsigned int nsibling) 2933{ 2934 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 2935 struct intel_context *ve; 2936 struct igt_live_test t; 2937 unsigned int n; 2938 int err; 2939 2940 /* 2941 * Check that by setting the execution mask on a request, we can 2942 * restrict it to our desired engine within the virtual engine. 2943 */ 2944 2945 ve = intel_execlists_create_virtual(siblings, nsibling); 2946 if (IS_ERR(ve)) { 2947 err = PTR_ERR(ve); 2948 goto out_close; 2949 } 2950 2951 err = intel_context_pin(ve); 2952 if (err) 2953 goto out_put; 2954 2955 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 2956 if (err) 2957 goto out_unpin; 2958 2959 for (n = 0; n < nsibling; n++) { 2960 request[n] = i915_request_create(ve); 2961 if (IS_ERR(request[n])) { 2962 err = PTR_ERR(request[n]); 2963 nsibling = n; 2964 goto out; 2965 } 2966 2967 /* Reverse order as it's more likely to be unnatural */ 2968 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 2969 2970 i915_request_get(request[n]); 2971 i915_request_add(request[n]); 2972 } 2973 2974 for (n = 0; n < nsibling; n++) { 2975 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 2976 pr_err("%s(%s): wait for %llx:%lld timed out\n", 2977 __func__, ve->engine->name, 2978 request[n]->fence.context, 2979 request[n]->fence.seqno); 2980 2981 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 2982 __func__, ve->engine->name, 2983 request[n]->fence.context, 2984 request[n]->fence.seqno); 2985 GEM_TRACE_DUMP(); 2986 intel_gt_set_wedged(gt); 2987 err = -EIO; 2988 goto out; 2989 } 2990 2991 if (request[n]->engine != siblings[nsibling - n - 1]) { 2992 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 2993 request[n]->engine->name, 2994 siblings[nsibling - n - 1]->name); 2995 err = -EINVAL; 2996 goto out; 2997 } 2998 } 2999 3000 err = igt_live_test_end(&t); 3001out: 3002 if (igt_flush_test(gt->i915)) 3003 err = -EIO; 3004 3005 for (n = 0; n < nsibling; n++) 3006 i915_request_put(request[n]); 3007 3008out_unpin: 3009 intel_context_unpin(ve); 3010out_put: 3011 intel_context_put(ve); 3012out_close: 3013 return err; 3014} 3015 3016static int live_virtual_mask(void *arg) 3017{ 3018 struct intel_gt *gt = arg; 3019 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3020 unsigned int class, inst; 3021 int err; 3022 3023 if (USES_GUC_SUBMISSION(gt->i915)) 3024 return 0; 3025 3026 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3027 unsigned int nsibling; 3028 3029 nsibling = 0; 3030 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3031 if (!gt->engine_class[class][inst]) 3032 break; 3033 3034 siblings[nsibling++] = gt->engine_class[class][inst]; 3035 } 3036 if (nsibling < 2) 3037 continue; 3038 3039 err = mask_virtual_engine(gt, siblings, nsibling); 3040 if (err) 3041 return err; 3042 } 3043 3044 return 0; 3045} 3046 3047static int preserved_virtual_engine(struct intel_gt *gt, 3048 struct intel_engine_cs **siblings, 3049 unsigned int nsibling) 3050{ 3051 struct i915_request *last = NULL; 3052 struct intel_context *ve; 3053 struct i915_vma *scratch; 3054 struct igt_live_test t; 3055 unsigned int n; 3056 int err = 0; 3057 u32 *cs; 3058 3059 scratch = create_scratch(siblings[0]->gt); 3060 if (IS_ERR(scratch)) 3061 return PTR_ERR(scratch); 3062 3063 ve = intel_execlists_create_virtual(siblings, nsibling); 3064 if (IS_ERR(ve)) { 3065 err = PTR_ERR(ve); 3066 goto out_scratch; 3067 } 3068 3069 err = intel_context_pin(ve); 3070 if (err) 3071 goto out_put; 3072 3073 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3074 if (err) 3075 goto out_unpin; 3076 3077 for (n = 0; n < NUM_GPR_DW; n++) { 3078 struct intel_engine_cs *engine = siblings[n % nsibling]; 3079 struct i915_request *rq; 3080 3081 rq = i915_request_create(ve); 3082 if (IS_ERR(rq)) { 3083 err = PTR_ERR(rq); 3084 goto out_end; 3085 } 3086 3087 i915_request_put(last); 3088 last = i915_request_get(rq); 3089 3090 cs = intel_ring_begin(rq, 8); 3091 if (IS_ERR(cs)) { 3092 i915_request_add(rq); 3093 err = PTR_ERR(cs); 3094 goto out_end; 3095 } 3096 3097 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3098 *cs++ = CS_GPR(engine, n); 3099 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 3100 *cs++ = 0; 3101 3102 *cs++ = MI_LOAD_REGISTER_IMM(1); 3103 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 3104 *cs++ = n + 1; 3105 3106 *cs++ = MI_NOOP; 3107 intel_ring_advance(rq, cs); 3108 3109 /* Restrict this request to run on a particular engine */ 3110 rq->execution_mask = engine->mask; 3111 i915_request_add(rq); 3112 } 3113 3114 if (i915_request_wait(last, 0, HZ / 5) < 0) { 3115 err = -ETIME; 3116 goto out_end; 3117 } 3118 3119 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 3120 if (IS_ERR(cs)) { 3121 err = PTR_ERR(cs); 3122 goto out_end; 3123 } 3124 3125 for (n = 0; n < NUM_GPR_DW; n++) { 3126 if (cs[n] != n) { 3127 pr_err("Incorrect value[%d] found for GPR[%d]\n", 3128 cs[n], n); 3129 err = -EINVAL; 3130 break; 3131 } 3132 } 3133 3134 i915_gem_object_unpin_map(scratch->obj); 3135 3136out_end: 3137 if (igt_live_test_end(&t)) 3138 err = -EIO; 3139 i915_request_put(last); 3140out_unpin: 3141 intel_context_unpin(ve); 3142out_put: 3143 intel_context_put(ve); 3144out_scratch: 3145 i915_vma_unpin_and_release(&scratch, 0); 3146 return err; 3147} 3148 3149static int live_virtual_preserved(void *arg) 3150{ 3151 struct intel_gt *gt = arg; 3152 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3153 unsigned int class, inst; 3154 3155 /* 3156 * Check that the context image retains non-privileged (user) registers 3157 * from one engine to the next. For this we check that the CS_GPR 3158 * are preserved. 3159 */ 3160 3161 if (USES_GUC_SUBMISSION(gt->i915)) 3162 return 0; 3163 3164 /* As we use CS_GPR we cannot run before they existed on all engines. */ 3165 if (INTEL_GEN(gt->i915) < 9) 3166 return 0; 3167 3168 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3169 int nsibling, err; 3170 3171 nsibling = 0; 3172 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3173 if (!gt->engine_class[class][inst]) 3174 continue; 3175 3176 siblings[nsibling++] = gt->engine_class[class][inst]; 3177 } 3178 if (nsibling < 2) 3179 continue; 3180 3181 err = preserved_virtual_engine(gt, siblings, nsibling); 3182 if (err) 3183 return err; 3184 } 3185 3186 return 0; 3187} 3188 3189static int bond_virtual_engine(struct intel_gt *gt, 3190 unsigned int class, 3191 struct intel_engine_cs **siblings, 3192 unsigned int nsibling, 3193 unsigned int flags) 3194#define BOND_SCHEDULE BIT(0) 3195{ 3196 struct intel_engine_cs *master; 3197 struct i915_request *rq[16]; 3198 enum intel_engine_id id; 3199 struct igt_spinner spin; 3200 unsigned long n; 3201 int err; 3202 3203 /* 3204 * A set of bonded requests is intended to be run concurrently 3205 * across a number of engines. We use one request per-engine 3206 * and a magic fence to schedule each of the bonded requests 3207 * at the same time. A consequence of our current scheduler is that 3208 * we only move requests to the HW ready queue when the request 3209 * becomes ready, that is when all of its prerequisite fences have 3210 * been signaled. As one of those fences is the master submit fence, 3211 * there is a delay on all secondary fences as the HW may be 3212 * currently busy. Equally, as all the requests are independent, 3213 * they may have other fences that delay individual request 3214 * submission to HW. Ergo, we do not guarantee that all requests are 3215 * immediately submitted to HW at the same time, just that if the 3216 * rules are abided by, they are ready at the same time as the 3217 * first is submitted. Userspace can embed semaphores in its batch 3218 * to ensure parallel execution of its phases as it requires. 3219 * Though naturally it gets requested that perhaps the scheduler should 3220 * take care of parallel execution, even across preemption events on 3221 * different HW. (The proper answer is of course "lalalala".) 3222 * 3223 * With the submit-fence, we have identified three possible phases 3224 * of synchronisation depending on the master fence: queued (not 3225 * ready), executing, and signaled. The first two are quite simple 3226 * and checked below. However, the signaled master fence handling is 3227 * contentious. Currently we do not distinguish between a signaled 3228 * fence and an expired fence, as once signaled it does not convey 3229 * any information about the previous execution. It may even be freed 3230 * and hence checking later it may not exist at all. Ergo we currently 3231 * do not apply the bonding constraint for an already signaled fence, 3232 * as our expectation is that it should not constrain the secondaries 3233 * and is outside of the scope of the bonded request API (i.e. all 3234 * userspace requests are meant to be running in parallel). As 3235 * it imposes no constraint, and is effectively a no-op, we do not 3236 * check below as normal execution flows are checked extensively above. 3237 * 3238 * XXX Is the degenerate handling of signaled submit fences the 3239 * expected behaviour for userpace? 3240 */ 3241 3242 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 3243 3244 if (igt_spinner_init(&spin, gt)) 3245 return -ENOMEM; 3246 3247 err = 0; 3248 rq[0] = ERR_PTR(-ENOMEM); 3249 for_each_engine(master, gt, id) { 3250 struct i915_sw_fence fence = {}; 3251 3252 if (master->class == class) 3253 continue; 3254 3255 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 3256 3257 rq[0] = igt_spinner_create_request(&spin, 3258 master->kernel_context, 3259 MI_NOOP); 3260 if (IS_ERR(rq[0])) { 3261 err = PTR_ERR(rq[0]); 3262 goto out; 3263 } 3264 i915_request_get(rq[0]); 3265 3266 if (flags & BOND_SCHEDULE) { 3267 onstack_fence_init(&fence); 3268 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 3269 &fence, 3270 GFP_KERNEL); 3271 } 3272 3273 i915_request_add(rq[0]); 3274 if (err < 0) 3275 goto out; 3276 3277 if (!(flags & BOND_SCHEDULE) && 3278 !igt_wait_for_spinner(&spin, rq[0])) { 3279 err = -EIO; 3280 goto out; 3281 } 3282 3283 for (n = 0; n < nsibling; n++) { 3284 struct intel_context *ve; 3285 3286 ve = intel_execlists_create_virtual(siblings, nsibling); 3287 if (IS_ERR(ve)) { 3288 err = PTR_ERR(ve); 3289 onstack_fence_fini(&fence); 3290 goto out; 3291 } 3292 3293 err = intel_virtual_engine_attach_bond(ve->engine, 3294 master, 3295 siblings[n]); 3296 if (err) { 3297 intel_context_put(ve); 3298 onstack_fence_fini(&fence); 3299 goto out; 3300 } 3301 3302 err = intel_context_pin(ve); 3303 intel_context_put(ve); 3304 if (err) { 3305 onstack_fence_fini(&fence); 3306 goto out; 3307 } 3308 3309 rq[n + 1] = i915_request_create(ve); 3310 intel_context_unpin(ve); 3311 if (IS_ERR(rq[n + 1])) { 3312 err = PTR_ERR(rq[n + 1]); 3313 onstack_fence_fini(&fence); 3314 goto out; 3315 } 3316 i915_request_get(rq[n + 1]); 3317 3318 err = i915_request_await_execution(rq[n + 1], 3319 &rq[0]->fence, 3320 ve->engine->bond_execute); 3321 i915_request_add(rq[n + 1]); 3322 if (err < 0) { 3323 onstack_fence_fini(&fence); 3324 goto out; 3325 } 3326 } 3327 onstack_fence_fini(&fence); 3328 intel_engine_flush_submission(master); 3329 igt_spinner_end(&spin); 3330 3331 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 3332 pr_err("Master request did not execute (on %s)!\n", 3333 rq[0]->engine->name); 3334 err = -EIO; 3335 goto out; 3336 } 3337 3338 for (n = 0; n < nsibling; n++) { 3339 if (i915_request_wait(rq[n + 1], 0, 3340 MAX_SCHEDULE_TIMEOUT) < 0) { 3341 err = -EIO; 3342 goto out; 3343 } 3344 3345 if (rq[n + 1]->engine != siblings[n]) { 3346 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 3347 siblings[n]->name, 3348 rq[n + 1]->engine->name, 3349 rq[0]->engine->name); 3350 err = -EINVAL; 3351 goto out; 3352 } 3353 } 3354 3355 for (n = 0; !IS_ERR(rq[n]); n++) 3356 i915_request_put(rq[n]); 3357 rq[0] = ERR_PTR(-ENOMEM); 3358 } 3359 3360out: 3361 for (n = 0; !IS_ERR(rq[n]); n++) 3362 i915_request_put(rq[n]); 3363 if (igt_flush_test(gt->i915)) 3364 err = -EIO; 3365 3366 igt_spinner_fini(&spin); 3367 return err; 3368} 3369 3370static int live_virtual_bond(void *arg) 3371{ 3372 static const struct phase { 3373 const char *name; 3374 unsigned int flags; 3375 } phases[] = { 3376 { "", 0 }, 3377 { "schedule", BOND_SCHEDULE }, 3378 { }, 3379 }; 3380 struct intel_gt *gt = arg; 3381 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3382 unsigned int class, inst; 3383 int err; 3384 3385 if (USES_GUC_SUBMISSION(gt->i915)) 3386 return 0; 3387 3388 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3389 const struct phase *p; 3390 int nsibling; 3391 3392 nsibling = 0; 3393 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3394 if (!gt->engine_class[class][inst]) 3395 break; 3396 3397 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); 3398 siblings[nsibling++] = gt->engine_class[class][inst]; 3399 } 3400 if (nsibling < 2) 3401 continue; 3402 3403 for (p = phases; p->name; p++) { 3404 err = bond_virtual_engine(gt, 3405 class, siblings, nsibling, 3406 p->flags); 3407 if (err) { 3408 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 3409 __func__, p->name, class, nsibling, err); 3410 return err; 3411 } 3412 } 3413 } 3414 3415 return 0; 3416} 3417 3418static int reset_virtual_engine(struct intel_gt *gt, 3419 struct intel_engine_cs **siblings, 3420 unsigned int nsibling) 3421{ 3422 struct intel_engine_cs *engine; 3423 struct intel_context *ve; 3424 unsigned long *heartbeat; 3425 struct igt_spinner spin; 3426 struct i915_request *rq; 3427 unsigned int n; 3428 int err = 0; 3429 3430 /* 3431 * In order to support offline error capture for fast preempt reset, 3432 * we need to decouple the guilty request and ensure that it and its 3433 * descendents are not executed while the capture is in progress. 3434 */ 3435 3436 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); 3437 if (!heartbeat) 3438 return -ENOMEM; 3439 3440 if (igt_spinner_init(&spin, gt)) { 3441 err = -ENOMEM; 3442 goto out_free; 3443 } 3444 3445 ve = intel_execlists_create_virtual(siblings, nsibling); 3446 if (IS_ERR(ve)) { 3447 err = PTR_ERR(ve); 3448 goto out_spin; 3449 } 3450 3451 for (n = 0; n < nsibling; n++) 3452 engine_heartbeat_disable(siblings[n], &heartbeat[n]); 3453 3454 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 3455 if (IS_ERR(rq)) { 3456 err = PTR_ERR(rq); 3457 goto out_heartbeat; 3458 } 3459 i915_request_add(rq); 3460 3461 if (!igt_wait_for_spinner(&spin, rq)) { 3462 intel_gt_set_wedged(gt); 3463 err = -ETIME; 3464 goto out_heartbeat; 3465 } 3466 3467 engine = rq->engine; 3468 GEM_BUG_ON(engine == ve->engine); 3469 3470 /* Take ownership of the reset and tasklet */ 3471 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 3472 >->reset.flags)) { 3473 intel_gt_set_wedged(gt); 3474 err = -EBUSY; 3475 goto out_heartbeat; 3476 } 3477 tasklet_disable(&engine->execlists.tasklet); 3478 3479 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 3480 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 3481 3482 /* Fake a preemption event; failed of course */ 3483 spin_lock_irq(&engine->active.lock); 3484 __unwind_incomplete_requests(engine); 3485 spin_unlock_irq(&engine->active.lock); 3486 GEM_BUG_ON(rq->engine != ve->engine); 3487 3488 /* Reset the engine while keeping our active request on hold */ 3489 execlists_hold(engine, rq); 3490 GEM_BUG_ON(!i915_request_on_hold(rq)); 3491 3492 intel_engine_reset(engine, NULL); 3493 GEM_BUG_ON(rq->fence.error != -EIO); 3494 3495 /* Release our grasp on the engine, letting CS flow again */ 3496 tasklet_enable(&engine->execlists.tasklet); 3497 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 3498 3499 /* Check that we do not resubmit the held request */ 3500 i915_request_get(rq); 3501 if (!i915_request_wait(rq, 0, HZ / 5)) { 3502 pr_err("%s: on hold request completed!\n", 3503 engine->name); 3504 intel_gt_set_wedged(gt); 3505 err = -EIO; 3506 goto out_rq; 3507 } 3508 GEM_BUG_ON(!i915_request_on_hold(rq)); 3509 3510 /* But is resubmitted on release */ 3511 execlists_unhold(engine, rq); 3512 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 3513 pr_err("%s: held request did not complete!\n", 3514 engine->name); 3515 intel_gt_set_wedged(gt); 3516 err = -ETIME; 3517 } 3518 3519out_rq: 3520 i915_request_put(rq); 3521out_heartbeat: 3522 for (n = 0; n < nsibling; n++) 3523 engine_heartbeat_enable(siblings[n], heartbeat[n]); 3524 3525 intel_context_put(ve); 3526out_spin: 3527 igt_spinner_fini(&spin); 3528out_free: 3529 kfree(heartbeat); 3530 return err; 3531} 3532 3533static int live_virtual_reset(void *arg) 3534{ 3535 struct intel_gt *gt = arg; 3536 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3537 unsigned int class, inst; 3538 3539 /* 3540 * Check that we handle a reset event within a virtual engine. 3541 * Only the physical engine is reset, but we have to check the flow 3542 * of the virtual requests around the reset, and make sure it is not 3543 * forgotten. 3544 */ 3545 3546 if (USES_GUC_SUBMISSION(gt->i915)) 3547 return 0; 3548 3549 if (!intel_has_reset_engine(gt)) 3550 return 0; 3551 3552 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3553 int nsibling, err; 3554 3555 nsibling = 0; 3556 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3557 if (!gt->engine_class[class][inst]) 3558 continue; 3559 3560 siblings[nsibling++] = gt->engine_class[class][inst]; 3561 } 3562 if (nsibling < 2) 3563 continue; 3564 3565 err = reset_virtual_engine(gt, siblings, nsibling); 3566 if (err) 3567 return err; 3568 } 3569 3570 return 0; 3571} 3572 3573int intel_execlists_live_selftests(struct drm_i915_private *i915) 3574{ 3575 static const struct i915_subtest tests[] = { 3576 SUBTEST(live_sanitycheck), 3577 SUBTEST(live_unlite_switch), 3578 SUBTEST(live_unlite_preempt), 3579 SUBTEST(live_hold_reset), 3580 SUBTEST(live_timeslice_preempt), 3581 SUBTEST(live_timeslice_queue), 3582 SUBTEST(live_busywait_preempt), 3583 SUBTEST(live_preempt), 3584 SUBTEST(live_late_preempt), 3585 SUBTEST(live_nopreempt), 3586 SUBTEST(live_preempt_cancel), 3587 SUBTEST(live_suppress_self_preempt), 3588 SUBTEST(live_suppress_wait_preempt), 3589 SUBTEST(live_chain_preempt), 3590 SUBTEST(live_preempt_gang), 3591 SUBTEST(live_preempt_hang), 3592 SUBTEST(live_preempt_timeout), 3593 SUBTEST(live_preempt_smoke), 3594 SUBTEST(live_virtual_engine), 3595 SUBTEST(live_virtual_mask), 3596 SUBTEST(live_virtual_preserved), 3597 SUBTEST(live_virtual_bond), 3598 SUBTEST(live_virtual_reset), 3599 }; 3600 3601 if (!HAS_EXECLISTS(i915)) 3602 return 0; 3603 3604 if (intel_gt_is_wedged(&i915->gt)) 3605 return 0; 3606 3607 return intel_gt_live_subtests(tests, &i915->gt); 3608} 3609 3610static void hexdump(const void *buf, size_t len) 3611{ 3612 const size_t rowsize = 8 * sizeof(u32); 3613 const void *prev = NULL; 3614 bool skip = false; 3615 size_t pos; 3616 3617 for (pos = 0; pos < len; pos += rowsize) { 3618 char line[128]; 3619 3620 if (prev && !memcmp(prev, buf + pos, rowsize)) { 3621 if (!skip) { 3622 pr_info("*\n"); 3623 skip = true; 3624 } 3625 continue; 3626 } 3627 3628 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, 3629 rowsize, sizeof(u32), 3630 line, sizeof(line), 3631 false) >= sizeof(line)); 3632 pr_info("[%04zx] %s\n", pos, line); 3633 3634 prev = buf + pos; 3635 skip = false; 3636 } 3637} 3638 3639static int live_lrc_layout(void *arg) 3640{ 3641 struct intel_gt *gt = arg; 3642 struct intel_engine_cs *engine; 3643 enum intel_engine_id id; 3644 u32 *lrc; 3645 int err; 3646 3647 /* 3648 * Check the registers offsets we use to create the initial reg state 3649 * match the layout saved by HW. 3650 */ 3651 3652 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 3653 if (!lrc) 3654 return -ENOMEM; 3655 3656 err = 0; 3657 for_each_engine(engine, gt, id) { 3658 u32 *hw; 3659 int dw; 3660 3661 if (!engine->default_state) 3662 continue; 3663 3664 hw = i915_gem_object_pin_map(engine->default_state, 3665 I915_MAP_WB); 3666 if (IS_ERR(hw)) { 3667 err = PTR_ERR(hw); 3668 break; 3669 } 3670 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 3671 3672 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 3673 engine->kernel_context, 3674 engine, 3675 engine->kernel_context->ring, 3676 true); 3677 3678 dw = 0; 3679 do { 3680 u32 lri = hw[dw]; 3681 3682 if (lri == 0) { 3683 dw++; 3684 continue; 3685 } 3686 3687 if (lrc[dw] == 0) { 3688 pr_debug("%s: skipped instruction %x at dword %d\n", 3689 engine->name, lri, dw); 3690 dw++; 3691 continue; 3692 } 3693 3694 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 3695 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 3696 engine->name, dw, lri); 3697 err = -EINVAL; 3698 break; 3699 } 3700 3701 if (lrc[dw] != lri) { 3702 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 3703 engine->name, dw, lri, lrc[dw]); 3704 err = -EINVAL; 3705 break; 3706 } 3707 3708 lri &= 0x7f; 3709 lri++; 3710 dw++; 3711 3712 while (lri) { 3713 if (hw[dw] != lrc[dw]) { 3714 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 3715 engine->name, dw, hw[dw], lrc[dw]); 3716 err = -EINVAL; 3717 break; 3718 } 3719 3720 /* 3721 * Skip over the actual register value as we 3722 * expect that to differ. 3723 */ 3724 dw += 2; 3725 lri -= 2; 3726 } 3727 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 3728 3729 if (err) { 3730 pr_info("%s: HW register image:\n", engine->name); 3731 hexdump(hw, PAGE_SIZE); 3732 3733 pr_info("%s: SW register image:\n", engine->name); 3734 hexdump(lrc, PAGE_SIZE); 3735 } 3736 3737 i915_gem_object_unpin_map(engine->default_state); 3738 if (err) 3739 break; 3740 } 3741 3742 kfree(lrc); 3743 return err; 3744} 3745 3746static int find_offset(const u32 *lri, u32 offset) 3747{ 3748 int i; 3749 3750 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 3751 if (lri[i] == offset) 3752 return i; 3753 3754 return -1; 3755} 3756 3757static int live_lrc_fixed(void *arg) 3758{ 3759 struct intel_gt *gt = arg; 3760 struct intel_engine_cs *engine; 3761 enum intel_engine_id id; 3762 int err = 0; 3763 3764 /* 3765 * Check the assumed register offsets match the actual locations in 3766 * the context image. 3767 */ 3768 3769 for_each_engine(engine, gt, id) { 3770 const struct { 3771 u32 reg; 3772 u32 offset; 3773 const char *name; 3774 } tbl[] = { 3775 { 3776 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 3777 CTX_RING_START - 1, 3778 "RING_START" 3779 }, 3780 { 3781 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 3782 CTX_RING_CTL - 1, 3783 "RING_CTL" 3784 }, 3785 { 3786 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 3787 CTX_RING_HEAD - 1, 3788 "RING_HEAD" 3789 }, 3790 { 3791 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 3792 CTX_RING_TAIL - 1, 3793 "RING_TAIL" 3794 }, 3795 { 3796 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 3797 lrc_ring_mi_mode(engine), 3798 "RING_MI_MODE" 3799 }, 3800 { 3801 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 3802 CTX_BB_STATE - 1, 3803 "BB_STATE" 3804 }, 3805 { }, 3806 }, *t; 3807 u32 *hw; 3808 3809 if (!engine->default_state) 3810 continue; 3811 3812 hw = i915_gem_object_pin_map(engine->default_state, 3813 I915_MAP_WB); 3814 if (IS_ERR(hw)) { 3815 err = PTR_ERR(hw); 3816 break; 3817 } 3818 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 3819 3820 for (t = tbl; t->name; t++) { 3821 int dw = find_offset(hw, t->reg); 3822 3823 if (dw != t->offset) { 3824 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 3825 engine->name, 3826 t->name, 3827 t->reg, 3828 dw, 3829 t->offset); 3830 err = -EINVAL; 3831 } 3832 } 3833 3834 i915_gem_object_unpin_map(engine->default_state); 3835 } 3836 3837 return err; 3838} 3839 3840static int __live_lrc_state(struct intel_engine_cs *engine, 3841 struct i915_vma *scratch) 3842{ 3843 struct intel_context *ce; 3844 struct i915_request *rq; 3845 enum { 3846 RING_START_IDX = 0, 3847 RING_TAIL_IDX, 3848 MAX_IDX 3849 }; 3850 u32 expected[MAX_IDX]; 3851 u32 *cs; 3852 int err; 3853 int n; 3854 3855 ce = intel_context_create(engine); 3856 if (IS_ERR(ce)) 3857 return PTR_ERR(ce); 3858 3859 err = intel_context_pin(ce); 3860 if (err) 3861 goto err_put; 3862 3863 rq = i915_request_create(ce); 3864 if (IS_ERR(rq)) { 3865 err = PTR_ERR(rq); 3866 goto err_unpin; 3867 } 3868 3869 cs = intel_ring_begin(rq, 4 * MAX_IDX); 3870 if (IS_ERR(cs)) { 3871 err = PTR_ERR(cs); 3872 i915_request_add(rq); 3873 goto err_unpin; 3874 } 3875 3876 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3877 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 3878 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 3879 *cs++ = 0; 3880 3881 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 3882 3883 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3884 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 3885 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 3886 *cs++ = 0; 3887 3888 i915_request_get(rq); 3889 i915_request_add(rq); 3890 3891 intel_engine_flush_submission(engine); 3892 expected[RING_TAIL_IDX] = ce->ring->tail; 3893 3894 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 3895 err = -ETIME; 3896 goto err_rq; 3897 } 3898 3899 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 3900 if (IS_ERR(cs)) { 3901 err = PTR_ERR(cs); 3902 goto err_rq; 3903 } 3904 3905 for (n = 0; n < MAX_IDX; n++) { 3906 if (cs[n] != expected[n]) { 3907 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 3908 engine->name, n, cs[n], expected[n]); 3909 err = -EINVAL; 3910 break; 3911 } 3912 } 3913 3914 i915_gem_object_unpin_map(scratch->obj); 3915 3916err_rq: 3917 i915_request_put(rq); 3918err_unpin: 3919 intel_context_unpin(ce); 3920err_put: 3921 intel_context_put(ce); 3922 return err; 3923} 3924 3925static int live_lrc_state(void *arg) 3926{ 3927 struct intel_gt *gt = arg; 3928 struct intel_engine_cs *engine; 3929 struct i915_vma *scratch; 3930 enum intel_engine_id id; 3931 int err = 0; 3932 3933 /* 3934 * Check the live register state matches what we expect for this 3935 * intel_context. 3936 */ 3937 3938 scratch = create_scratch(gt); 3939 if (IS_ERR(scratch)) 3940 return PTR_ERR(scratch); 3941 3942 for_each_engine(engine, gt, id) { 3943 err = __live_lrc_state(engine, scratch); 3944 if (err) 3945 break; 3946 } 3947 3948 if (igt_flush_test(gt->i915)) 3949 err = -EIO; 3950 3951 i915_vma_unpin_and_release(&scratch, 0); 3952 return err; 3953} 3954 3955static int gpr_make_dirty(struct intel_engine_cs *engine) 3956{ 3957 struct i915_request *rq; 3958 u32 *cs; 3959 int n; 3960 3961 rq = intel_engine_create_kernel_request(engine); 3962 if (IS_ERR(rq)) 3963 return PTR_ERR(rq); 3964 3965 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 3966 if (IS_ERR(cs)) { 3967 i915_request_add(rq); 3968 return PTR_ERR(cs); 3969 } 3970 3971 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 3972 for (n = 0; n < NUM_GPR_DW; n++) { 3973 *cs++ = CS_GPR(engine, n); 3974 *cs++ = STACK_MAGIC; 3975 } 3976 *cs++ = MI_NOOP; 3977 3978 intel_ring_advance(rq, cs); 3979 i915_request_add(rq); 3980 3981 return 0; 3982} 3983 3984static int __live_gpr_clear(struct intel_engine_cs *engine, 3985 struct i915_vma *scratch) 3986{ 3987 struct intel_context *ce; 3988 struct i915_request *rq; 3989 u32 *cs; 3990 int err; 3991 int n; 3992 3993 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 3994 return 0; /* GPR only on rcs0 for gen8 */ 3995 3996 err = gpr_make_dirty(engine); 3997 if (err) 3998 return err; 3999 4000 ce = intel_context_create(engine); 4001 if (IS_ERR(ce)) 4002 return PTR_ERR(ce); 4003 4004 rq = intel_context_create_request(ce); 4005 if (IS_ERR(rq)) { 4006 err = PTR_ERR(rq); 4007 goto err_put; 4008 } 4009 4010 cs = intel_ring_begin(rq, 4 * NUM_GPR_DW); 4011 if (IS_ERR(cs)) { 4012 err = PTR_ERR(cs); 4013 i915_request_add(rq); 4014 goto err_put; 4015 } 4016 4017 for (n = 0; n < NUM_GPR_DW; n++) { 4018 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4019 *cs++ = CS_GPR(engine, n); 4020 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4021 *cs++ = 0; 4022 } 4023 4024 i915_request_get(rq); 4025 i915_request_add(rq); 4026 4027 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4028 err = -ETIME; 4029 goto err_rq; 4030 } 4031 4032 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4033 if (IS_ERR(cs)) { 4034 err = PTR_ERR(cs); 4035 goto err_rq; 4036 } 4037 4038 for (n = 0; n < NUM_GPR_DW; n++) { 4039 if (cs[n]) { 4040 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 4041 engine->name, 4042 n / 2, n & 1 ? "udw" : "ldw", 4043 cs[n]); 4044 err = -EINVAL; 4045 break; 4046 } 4047 } 4048 4049 i915_gem_object_unpin_map(scratch->obj); 4050 4051err_rq: 4052 i915_request_put(rq); 4053err_put: 4054 intel_context_put(ce); 4055 return err; 4056} 4057 4058static int live_gpr_clear(void *arg) 4059{ 4060 struct intel_gt *gt = arg; 4061 struct intel_engine_cs *engine; 4062 struct i915_vma *scratch; 4063 enum intel_engine_id id; 4064 int err = 0; 4065 4066 /* 4067 * Check that GPR registers are cleared in new contexts as we need 4068 * to avoid leaking any information from previous contexts. 4069 */ 4070 4071 scratch = create_scratch(gt); 4072 if (IS_ERR(scratch)) 4073 return PTR_ERR(scratch); 4074 4075 for_each_engine(engine, gt, id) { 4076 err = __live_gpr_clear(engine, scratch); 4077 if (err) 4078 break; 4079 } 4080 4081 if (igt_flush_test(gt->i915)) 4082 err = -EIO; 4083 4084 i915_vma_unpin_and_release(&scratch, 0); 4085 return err; 4086} 4087 4088int intel_lrc_live_selftests(struct drm_i915_private *i915) 4089{ 4090 static const struct i915_subtest tests[] = { 4091 SUBTEST(live_lrc_layout), 4092 SUBTEST(live_lrc_fixed), 4093 SUBTEST(live_lrc_state), 4094 SUBTEST(live_gpr_clear), 4095 }; 4096 4097 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 4098 return 0; 4099 4100 return intel_gt_live_subtests(tests, &i915->gt); 4101} 4102