1/* $NetBSD: i915_gem_context.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */ 2 3/* 4 * SPDX-License-Identifier: MIT 5 * 6 * Copyright �� 2017 Intel Corporation 7 */ 8 9#include <sys/cdefs.h> 10__KERNEL_RCSID(0, "$NetBSD: i915_gem_context.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $"); 11 12#include <linux/prime_numbers.h> 13 14#include "gem/i915_gem_pm.h" 15#include "gt/intel_engine_pm.h" 16#include "gt/intel_gt.h" 17#include "gt/intel_gt_requests.h" 18#include "gt/intel_reset.h" 19#include "i915_selftest.h" 20 21#include "gem/selftests/igt_gem_utils.h" 22#include "selftests/i915_random.h" 23#include "selftests/igt_flush_test.h" 24#include "selftests/igt_live_test.h" 25#include "selftests/igt_reset.h" 26#include "selftests/igt_spinner.h" 27#include "selftests/mock_drm.h" 28#include "selftests/mock_gem_device.h" 29 30#include "huge_gem_object.h" 31#include "igt_gem_utils.h" 32 33#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 34 35static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx) 36{ 37 /* single threaded, private ctx */ 38 return rcu_dereference_protected(ctx->vm, true); 39} 40 41static int live_nop_switch(void *arg) 42{ 43 const unsigned int nctx = 1024; 44 struct drm_i915_private *i915 = arg; 45 struct intel_engine_cs *engine; 46 struct i915_gem_context **ctx; 47 struct igt_live_test t; 48 struct file *file; 49 unsigned long n; 50 int err = -ENODEV; 51 52 /* 53 * Create as many contexts as we can feasibly get away with 54 * and check we can switch between them rapidly. 55 * 56 * Serves as very simple stress test for submission and HW switching 57 * between contexts. 58 */ 59 60 if (!DRIVER_CAPS(i915)->has_logical_contexts) 61 return 0; 62 63 file = mock_file(i915); 64 if (IS_ERR(file)) 65 return PTR_ERR(file); 66 67 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 68 if (!ctx) { 69 err = -ENOMEM; 70 goto out_file; 71 } 72 73 for (n = 0; n < nctx; n++) { 74 ctx[n] = live_context(i915, file); 75 if (IS_ERR(ctx[n])) { 76 err = PTR_ERR(ctx[n]); 77 goto out_file; 78 } 79 } 80 81 for_each_uabi_engine(engine, i915) { 82 struct i915_request *rq = NULL; 83 unsigned long end_time, prime; 84 ktime_t times[2] = {}; 85 86 times[0] = ktime_get_raw(); 87 for (n = 0; n < nctx; n++) { 88 struct i915_request *this; 89 90 this = igt_request_alloc(ctx[n], engine); 91 if (IS_ERR(this)) { 92 err = PTR_ERR(this); 93 goto out_file; 94 } 95 if (rq) { 96 i915_request_await_dma_fence(this, &rq->fence); 97 i915_request_put(rq); 98 } 99 rq = i915_request_get(this); 100 i915_request_add(this); 101 } 102 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 103 pr_err("Failed to populated %d contexts\n", nctx); 104 intel_gt_set_wedged(&i915->gt); 105 i915_request_put(rq); 106 err = -EIO; 107 goto out_file; 108 } 109 i915_request_put(rq); 110 111 times[1] = ktime_get_raw(); 112 113 pr_info("Populated %d contexts on %s in %lluns\n", 114 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 115 116 err = igt_live_test_begin(&t, i915, __func__, engine->name); 117 if (err) 118 goto out_file; 119 120 end_time = jiffies + i915_selftest.timeout_jiffies; 121 for_each_prime_number_from(prime, 2, 8192) { 122 times[1] = ktime_get_raw(); 123 124 rq = NULL; 125 for (n = 0; n < prime; n++) { 126 struct i915_request *this; 127 128 this = igt_request_alloc(ctx[n % nctx], engine); 129 if (IS_ERR(this)) { 130 err = PTR_ERR(this); 131 goto out_file; 132 } 133 134 if (rq) { /* Force submission order */ 135 i915_request_await_dma_fence(this, &rq->fence); 136 i915_request_put(rq); 137 } 138 139 /* 140 * This space is left intentionally blank. 141 * 142 * We do not actually want to perform any 143 * action with this request, we just want 144 * to measure the latency in allocation 145 * and submission of our breadcrumbs - 146 * ensuring that the bare request is sufficient 147 * for the system to work (i.e. proper HEAD 148 * tracking of the rings, interrupt handling, 149 * etc). It also gives us the lowest bounds 150 * for latency. 151 */ 152 153 rq = i915_request_get(this); 154 i915_request_add(this); 155 } 156 GEM_BUG_ON(!rq); 157 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 158 pr_err("Switching between %ld contexts timed out\n", 159 prime); 160 intel_gt_set_wedged(&i915->gt); 161 i915_request_put(rq); 162 break; 163 } 164 i915_request_put(rq); 165 166 times[1] = ktime_sub(ktime_get_raw(), times[1]); 167 if (prime == 2) 168 times[0] = times[1]; 169 170 if (__igt_timeout(end_time, NULL)) 171 break; 172 } 173 174 err = igt_live_test_end(&t); 175 if (err) 176 goto out_file; 177 178 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 179 engine->name, 180 ktime_to_ns(times[0]), 181 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 182 } 183 184out_file: 185 fput(file); 186 return err; 187} 188 189struct parallel_switch { 190 struct task_struct *tsk; 191 struct intel_context *ce[2]; 192}; 193 194static int __live_parallel_switch1(void *data) 195{ 196 struct parallel_switch *arg = data; 197 IGT_TIMEOUT(end_time); 198 unsigned long count; 199 200 count = 0; 201 do { 202 struct i915_request *rq = NULL; 203 int err, n; 204 205 err = 0; 206 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { 207 struct i915_request *prev = rq; 208 209 rq = i915_request_create(arg->ce[n]); 210 if (IS_ERR(rq)) { 211 i915_request_put(prev); 212 return PTR_ERR(rq); 213 } 214 215 i915_request_get(rq); 216 if (prev) { 217 err = i915_request_await_dma_fence(rq, &prev->fence); 218 i915_request_put(prev); 219 } 220 221 i915_request_add(rq); 222 } 223 if (i915_request_wait(rq, 0, HZ / 5) < 0) 224 err = -ETIME; 225 i915_request_put(rq); 226 if (err) 227 return err; 228 229 count++; 230 } while (!__igt_timeout(end_time, NULL)); 231 232 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); 233 return 0; 234} 235 236static int __live_parallel_switchN(void *data) 237{ 238 struct parallel_switch *arg = data; 239 struct i915_request *rq = NULL; 240 IGT_TIMEOUT(end_time); 241 unsigned long count; 242 int n; 243 244 count = 0; 245 do { 246 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { 247 struct i915_request *prev = rq; 248 int err = 0; 249 250 rq = i915_request_create(arg->ce[n]); 251 if (IS_ERR(rq)) { 252 i915_request_put(prev); 253 return PTR_ERR(rq); 254 } 255 256 i915_request_get(rq); 257 if (prev) { 258 err = i915_request_await_dma_fence(rq, &prev->fence); 259 i915_request_put(prev); 260 } 261 262 i915_request_add(rq); 263 if (err) { 264 i915_request_put(rq); 265 return err; 266 } 267 } 268 269 count++; 270 } while (!__igt_timeout(end_time, NULL)); 271 i915_request_put(rq); 272 273 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); 274 return 0; 275} 276 277static int live_parallel_switch(void *arg) 278{ 279 struct drm_i915_private *i915 = arg; 280 static int (* const func[])(void *arg) = { 281 __live_parallel_switch1, 282 __live_parallel_switchN, 283 NULL, 284 }; 285 struct parallel_switch *data = NULL; 286 struct i915_gem_engines *engines; 287 struct i915_gem_engines_iter it; 288 int (* const *fn)(void *arg); 289 struct i915_gem_context *ctx; 290 struct intel_context *ce; 291 struct file *file; 292 int n, m, count; 293 int err = 0; 294 295 /* 296 * Check we can process switches on all engines simultaneously. 297 */ 298 299 if (!DRIVER_CAPS(i915)->has_logical_contexts) 300 return 0; 301 302 file = mock_file(i915); 303 if (IS_ERR(file)) 304 return PTR_ERR(file); 305 306 ctx = live_context(i915, file); 307 if (IS_ERR(ctx)) { 308 err = PTR_ERR(ctx); 309 goto out_file; 310 } 311 312 engines = i915_gem_context_lock_engines(ctx); 313 count = engines->num_engines; 314 315 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 316 if (!data) { 317 i915_gem_context_unlock_engines(ctx); 318 err = -ENOMEM; 319 goto out_file; 320 } 321 322 m = 0; /* Use the first context as our template for the engines */ 323 for_each_gem_engine(ce, engines, it) { 324 err = intel_context_pin(ce); 325 if (err) { 326 i915_gem_context_unlock_engines(ctx); 327 goto out; 328 } 329 data[m++].ce[0] = intel_context_get(ce); 330 } 331 i915_gem_context_unlock_engines(ctx); 332 333 /* Clone the same set of engines into the other contexts */ 334 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 335 ctx = live_context(i915, file); 336 if (IS_ERR(ctx)) { 337 err = PTR_ERR(ctx); 338 goto out; 339 } 340 341 for (m = 0; m < count; m++) { 342 if (!data[m].ce[0]) 343 continue; 344 345 ce = intel_context_create(data[m].ce[0]->engine); 346 if (IS_ERR(ce)) 347 goto out; 348 349 err = intel_context_pin(ce); 350 if (err) { 351 intel_context_put(ce); 352 goto out; 353 } 354 355 data[m].ce[n] = ce; 356 } 357 } 358 359 for (fn = func; !err && *fn; fn++) { 360 struct igt_live_test t; 361 int n; 362 363 err = igt_live_test_begin(&t, i915, __func__, ""); 364 if (err) 365 break; 366 367 for (n = 0; n < count; n++) { 368 if (!data[n].ce[0]) 369 continue; 370 371 data[n].tsk = kthread_run(*fn, &data[n], 372 "igt/parallel:%s", 373 data[n].ce[0]->engine->name); 374 if (IS_ERR(data[n].tsk)) { 375 err = PTR_ERR(data[n].tsk); 376 break; 377 } 378 get_task_struct(data[n].tsk); 379 } 380 381 yield(); /* start all threads before we kthread_stop() */ 382 383 for (n = 0; n < count; n++) { 384 int status; 385 386 if (IS_ERR_OR_NULL(data[n].tsk)) 387 continue; 388 389 status = kthread_stop(data[n].tsk); 390 if (status && !err) 391 err = status; 392 393 put_task_struct(data[n].tsk); 394 data[n].tsk = NULL; 395 } 396 397 if (igt_live_test_end(&t)) 398 err = -EIO; 399 } 400 401out: 402 for (n = 0; n < count; n++) { 403 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 404 if (!data[n].ce[m]) 405 continue; 406 407 intel_context_unpin(data[n].ce[m]); 408 intel_context_put(data[n].ce[m]); 409 } 410 } 411 kfree(data); 412out_file: 413 fput(file); 414 return err; 415} 416 417static unsigned long real_page_count(struct drm_i915_gem_object *obj) 418{ 419 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 420} 421 422static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 423{ 424 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 425} 426 427static int gpu_fill(struct intel_context *ce, 428 struct drm_i915_gem_object *obj, 429 unsigned int dw) 430{ 431 struct i915_vma *vma; 432 int err; 433 434 GEM_BUG_ON(obj->base.size > ce->vm->total); 435 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 436 437 vma = i915_vma_instance(obj, ce->vm, NULL); 438 if (IS_ERR(vma)) 439 return PTR_ERR(vma); 440 441 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 442 if (err) 443 return err; 444 445 /* 446 * Within the GTT the huge objects maps every page onto 447 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 448 * We set the nth dword within the page using the nth 449 * mapping via the GTT - this should exercise the GTT mapping 450 * whilst checking that each context provides a unique view 451 * into the object. 452 */ 453 err = igt_gpu_fill_dw(ce, vma, 454 (dw * real_page_count(obj)) << PAGE_SHIFT | 455 (dw * sizeof(u32)), 456 real_page_count(obj), 457 dw); 458 i915_vma_unpin(vma); 459 460 return err; 461} 462 463static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 464{ 465 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 466 unsigned int n, m, need_flush; 467 int err; 468 469 err = i915_gem_object_prepare_write(obj, &need_flush); 470 if (err) 471 return err; 472 473 for (n = 0; n < real_page_count(obj); n++) { 474 u32 *map; 475 476 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 477 for (m = 0; m < DW_PER_PAGE; m++) 478 map[m] = value; 479 if (!has_llc) 480 drm_clflush_virt_range(map, PAGE_SIZE); 481 kunmap_atomic(map); 482 } 483 484 i915_gem_object_finish_access(obj); 485 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 486 obj->write_domain = 0; 487 return 0; 488} 489 490static noinline int cpu_check(struct drm_i915_gem_object *obj, 491 unsigned int idx, unsigned int max) 492{ 493 unsigned int n, m, needs_flush; 494 int err; 495 496 err = i915_gem_object_prepare_read(obj, &needs_flush); 497 if (err) 498 return err; 499 500 for (n = 0; n < real_page_count(obj); n++) { 501 u32 *map; 502 503 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 504 if (needs_flush & CLFLUSH_BEFORE) 505 drm_clflush_virt_range(map, PAGE_SIZE); 506 507 for (m = 0; m < max; m++) { 508 if (map[m] != m) { 509 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 510 __builtin_return_address(0), idx, 511 n, real_page_count(obj), m, max, 512 map[m], m); 513 err = -EINVAL; 514 goto out_unmap; 515 } 516 } 517 518 for (; m < DW_PER_PAGE; m++) { 519 if (map[m] != STACK_MAGIC) { 520 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 521 __builtin_return_address(0), idx, n, m, 522 map[m], STACK_MAGIC); 523 err = -EINVAL; 524 goto out_unmap; 525 } 526 } 527 528out_unmap: 529 kunmap_atomic(map); 530 if (err) 531 break; 532 } 533 534 i915_gem_object_finish_access(obj); 535 return err; 536} 537 538static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 539{ 540 int err; 541 542 GEM_BUG_ON(obj->base.handle_count); 543 544 /* tie the object to the drm_file for easy reaping */ 545 err = idr_alloc(&to_drm_file(file)->object_idr, 546 &obj->base, 1, 0, GFP_KERNEL); 547 if (err < 0) 548 return err; 549 550 i915_gem_object_get(obj); 551 obj->base.handle_count++; 552 return 0; 553} 554 555static struct drm_i915_gem_object * 556create_test_object(struct i915_address_space *vm, 557 struct file *file, 558 struct list_head *objects) 559{ 560 struct drm_i915_gem_object *obj; 561 u64 size; 562 int err; 563 564 /* Keep in GEM's good graces */ 565 intel_gt_retire_requests(vm->gt); 566 567 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 568 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 569 570 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 571 if (IS_ERR(obj)) 572 return obj; 573 574 err = file_add_object(file, obj); 575 i915_gem_object_put(obj); 576 if (err) 577 return ERR_PTR(err); 578 579 err = cpu_fill(obj, STACK_MAGIC); 580 if (err) { 581 pr_err("Failed to fill object with cpu, err=%d\n", 582 err); 583 return ERR_PTR(err); 584 } 585 586 list_add_tail(&obj->st_link, objects); 587 return obj; 588} 589 590static unsigned long max_dwords(struct drm_i915_gem_object *obj) 591{ 592 unsigned long npages = fake_page_count(obj); 593 594 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 595 return npages / DW_PER_PAGE; 596} 597 598static void throttle_release(struct i915_request **q, int count) 599{ 600 int i; 601 602 for (i = 0; i < count; i++) { 603 if (IS_ERR_OR_NULL(q[i])) 604 continue; 605 606 i915_request_put(fetch_and_zero(&q[i])); 607 } 608} 609 610static int throttle(struct intel_context *ce, 611 struct i915_request **q, int count) 612{ 613 int i; 614 615 if (!IS_ERR_OR_NULL(q[0])) { 616 if (i915_request_wait(q[0], 617 I915_WAIT_INTERRUPTIBLE, 618 MAX_SCHEDULE_TIMEOUT) < 0) 619 return -EINTR; 620 621 i915_request_put(q[0]); 622 } 623 624 for (i = 0; i < count - 1; i++) 625 q[i] = q[i + 1]; 626 627 q[i] = intel_context_create_request(ce); 628 if (IS_ERR(q[i])) 629 return PTR_ERR(q[i]); 630 631 i915_request_get(q[i]); 632 i915_request_add(q[i]); 633 634 return 0; 635} 636 637static int igt_ctx_exec(void *arg) 638{ 639 struct drm_i915_private *i915 = arg; 640 struct intel_engine_cs *engine; 641 int err = -ENODEV; 642 643 /* 644 * Create a few different contexts (with different mm) and write 645 * through each ctx/mm using the GPU making sure those writes end 646 * up in the expected pages of our obj. 647 */ 648 649 if (!DRIVER_CAPS(i915)->has_logical_contexts) 650 return 0; 651 652 for_each_uabi_engine(engine, i915) { 653 struct drm_i915_gem_object *obj = NULL; 654 unsigned long ncontexts, ndwords, dw; 655 struct i915_request *tq[5] = {}; 656 struct igt_live_test t; 657 IGT_TIMEOUT(end_time); 658 LIST_HEAD(objects); 659 struct file *file; 660 661 if (!intel_engine_can_store_dword(engine)) 662 continue; 663 664 if (!engine->context_size) 665 continue; /* No logical context support in HW */ 666 667 file = mock_file(i915); 668 if (IS_ERR(file)) 669 return PTR_ERR(file); 670 671 err = igt_live_test_begin(&t, i915, __func__, engine->name); 672 if (err) 673 goto out_file; 674 675 ncontexts = 0; 676 ndwords = 0; 677 dw = 0; 678 while (!time_after(jiffies, end_time)) { 679 struct i915_gem_context *ctx; 680 struct intel_context *ce; 681 682 ctx = kernel_context(i915); 683 if (IS_ERR(ctx)) { 684 err = PTR_ERR(ctx); 685 goto out_file; 686 } 687 688 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 689 GEM_BUG_ON(IS_ERR(ce)); 690 691 if (!obj) { 692 obj = create_test_object(ce->vm, file, &objects); 693 if (IS_ERR(obj)) { 694 err = PTR_ERR(obj); 695 intel_context_put(ce); 696 kernel_context_close(ctx); 697 goto out_file; 698 } 699 } 700 701 err = gpu_fill(ce, obj, dw); 702 if (err) { 703 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 704 ndwords, dw, max_dwords(obj), 705 engine->name, 706 yesno(!!rcu_access_pointer(ctx->vm)), 707 err); 708 intel_context_put(ce); 709 kernel_context_close(ctx); 710 goto out_file; 711 } 712 713 err = throttle(ce, tq, ARRAY_SIZE(tq)); 714 if (err) { 715 intel_context_put(ce); 716 kernel_context_close(ctx); 717 goto out_file; 718 } 719 720 if (++dw == max_dwords(obj)) { 721 obj = NULL; 722 dw = 0; 723 } 724 725 ndwords++; 726 ncontexts++; 727 728 intel_context_put(ce); 729 kernel_context_close(ctx); 730 } 731 732 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 733 ncontexts, engine->name, ndwords); 734 735 ncontexts = dw = 0; 736 list_for_each_entry(obj, &objects, st_link) { 737 unsigned int rem = 738 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 739 740 err = cpu_check(obj, ncontexts++, rem); 741 if (err) 742 break; 743 744 dw += rem; 745 } 746 747out_file: 748 throttle_release(tq, ARRAY_SIZE(tq)); 749 if (igt_live_test_end(&t)) 750 err = -EIO; 751 752 fput(file); 753 if (err) 754 return err; 755 756 i915_gem_drain_freed_objects(i915); 757 } 758 759 return 0; 760} 761 762static int igt_shared_ctx_exec(void *arg) 763{ 764 struct drm_i915_private *i915 = arg; 765 struct i915_request *tq[5] = {}; 766 struct i915_gem_context *parent; 767 struct intel_engine_cs *engine; 768 struct igt_live_test t; 769 struct file *file; 770 int err = 0; 771 772 /* 773 * Create a few different contexts with the same mm and write 774 * through each ctx using the GPU making sure those writes end 775 * up in the expected pages of our obj. 776 */ 777 if (!DRIVER_CAPS(i915)->has_logical_contexts) 778 return 0; 779 780 file = mock_file(i915); 781 if (IS_ERR(file)) 782 return PTR_ERR(file); 783 784 parent = live_context(i915, file); 785 if (IS_ERR(parent)) { 786 err = PTR_ERR(parent); 787 goto out_file; 788 } 789 790 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 791 err = 0; 792 goto out_file; 793 } 794 795 err = igt_live_test_begin(&t, i915, __func__, ""); 796 if (err) 797 goto out_file; 798 799 for_each_uabi_engine(engine, i915) { 800 unsigned long ncontexts, ndwords, dw; 801 struct drm_i915_gem_object *obj = NULL; 802 IGT_TIMEOUT(end_time); 803 LIST_HEAD(objects); 804 805 if (!intel_engine_can_store_dword(engine)) 806 continue; 807 808 dw = 0; 809 ndwords = 0; 810 ncontexts = 0; 811 while (!time_after(jiffies, end_time)) { 812 struct i915_gem_context *ctx; 813 struct intel_context *ce; 814 815 ctx = kernel_context(i915); 816 if (IS_ERR(ctx)) { 817 err = PTR_ERR(ctx); 818 goto out_test; 819 } 820 821 mutex_lock(&ctx->mutex); 822 __assign_ppgtt(ctx, ctx_vm(parent)); 823 mutex_unlock(&ctx->mutex); 824 825 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 826 GEM_BUG_ON(IS_ERR(ce)); 827 828 if (!obj) { 829 obj = create_test_object(ctx_vm(parent), 830 file, &objects); 831 if (IS_ERR(obj)) { 832 err = PTR_ERR(obj); 833 intel_context_put(ce); 834 kernel_context_close(ctx); 835 goto out_test; 836 } 837 } 838 839 err = gpu_fill(ce, obj, dw); 840 if (err) { 841 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 842 ndwords, dw, max_dwords(obj), 843 engine->name, 844 yesno(!!rcu_access_pointer(ctx->vm)), 845 err); 846 intel_context_put(ce); 847 kernel_context_close(ctx); 848 goto out_test; 849 } 850 851 err = throttle(ce, tq, ARRAY_SIZE(tq)); 852 if (err) { 853 intel_context_put(ce); 854 kernel_context_close(ctx); 855 goto out_test; 856 } 857 858 if (++dw == max_dwords(obj)) { 859 obj = NULL; 860 dw = 0; 861 } 862 863 ndwords++; 864 ncontexts++; 865 866 intel_context_put(ce); 867 kernel_context_close(ctx); 868 } 869 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 870 ncontexts, engine->name, ndwords); 871 872 ncontexts = dw = 0; 873 list_for_each_entry(obj, &objects, st_link) { 874 unsigned int rem = 875 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 876 877 err = cpu_check(obj, ncontexts++, rem); 878 if (err) 879 goto out_test; 880 881 dw += rem; 882 } 883 884 i915_gem_drain_freed_objects(i915); 885 } 886out_test: 887 throttle_release(tq, ARRAY_SIZE(tq)); 888 if (igt_live_test_end(&t)) 889 err = -EIO; 890out_file: 891 fput(file); 892 return err; 893} 894 895static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) 896{ 897 struct drm_i915_gem_object *obj; 898 u32 *cmd; 899 int err; 900 901 if (INTEL_GEN(vma->vm->i915) < 8) 902 return ERR_PTR(-EINVAL); 903 904 obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); 905 if (IS_ERR(obj)) 906 return ERR_CAST(obj); 907 908 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 909 if (IS_ERR(cmd)) { 910 err = PTR_ERR(cmd); 911 goto err; 912 } 913 914 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 915 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); 916 *cmd++ = lower_32_bits(vma->node.start); 917 *cmd++ = upper_32_bits(vma->node.start); 918 *cmd = MI_BATCH_BUFFER_END; 919 920 __i915_gem_object_flush_map(obj, 0, 64); 921 i915_gem_object_unpin_map(obj); 922 923 intel_gt_chipset_flush(vma->vm->gt); 924 925 vma = i915_vma_instance(obj, vma->vm, NULL); 926 if (IS_ERR(vma)) { 927 err = PTR_ERR(vma); 928 goto err; 929 } 930 931 err = i915_vma_pin(vma, 0, 0, PIN_USER); 932 if (err) 933 goto err; 934 935 return vma; 936 937err: 938 i915_gem_object_put(obj); 939 return ERR_PTR(err); 940} 941 942static int 943emit_rpcs_query(struct drm_i915_gem_object *obj, 944 struct intel_context *ce, 945 struct i915_request **rq_out) 946{ 947 struct i915_request *rq; 948 struct i915_vma *batch; 949 struct i915_vma *vma; 950 int err; 951 952 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 953 954 vma = i915_vma_instance(obj, ce->vm, NULL); 955 if (IS_ERR(vma)) 956 return PTR_ERR(vma); 957 958 i915_gem_object_lock(obj); 959 err = i915_gem_object_set_to_gtt_domain(obj, false); 960 i915_gem_object_unlock(obj); 961 if (err) 962 return err; 963 964 err = i915_vma_pin(vma, 0, 0, PIN_USER); 965 if (err) 966 return err; 967 968 batch = rpcs_query_batch(vma); 969 if (IS_ERR(batch)) { 970 err = PTR_ERR(batch); 971 goto err_vma; 972 } 973 974 rq = i915_request_create(ce); 975 if (IS_ERR(rq)) { 976 err = PTR_ERR(rq); 977 goto err_batch; 978 } 979 980 err = rq->engine->emit_bb_start(rq, 981 batch->node.start, batch->node.size, 982 0); 983 if (err) 984 goto err_request; 985 986 i915_vma_lock(batch); 987 err = i915_request_await_object(rq, batch->obj, false); 988 if (err == 0) 989 err = i915_vma_move_to_active(batch, rq, 0); 990 i915_vma_unlock(batch); 991 if (err) 992 goto skip_request; 993 994 i915_vma_lock(vma); 995 err = i915_request_await_object(rq, vma->obj, true); 996 if (err == 0) 997 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 998 i915_vma_unlock(vma); 999 if (err) 1000 goto skip_request; 1001 1002 i915_vma_unpin_and_release(&batch, 0); 1003 i915_vma_unpin(vma); 1004 1005 *rq_out = i915_request_get(rq); 1006 1007 i915_request_add(rq); 1008 1009 return 0; 1010 1011skip_request: 1012 i915_request_skip(rq, err); 1013err_request: 1014 i915_request_add(rq); 1015err_batch: 1016 i915_vma_unpin_and_release(&batch, 0); 1017err_vma: 1018 i915_vma_unpin(vma); 1019 1020 return err; 1021} 1022 1023#define TEST_IDLE BIT(0) 1024#define TEST_BUSY BIT(1) 1025#define TEST_RESET BIT(2) 1026 1027static int 1028__sseu_prepare(const char *name, 1029 unsigned int flags, 1030 struct intel_context *ce, 1031 struct igt_spinner **spin) 1032{ 1033 struct i915_request *rq; 1034 int ret; 1035 1036 *spin = NULL; 1037 if (!(flags & (TEST_BUSY | TEST_RESET))) 1038 return 0; 1039 1040 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1041 if (!*spin) 1042 return -ENOMEM; 1043 1044 ret = igt_spinner_init(*spin, ce->engine->gt); 1045 if (ret) 1046 goto err_free; 1047 1048 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1049 if (IS_ERR(rq)) { 1050 ret = PTR_ERR(rq); 1051 goto err_fini; 1052 } 1053 1054 i915_request_add(rq); 1055 1056 if (!igt_wait_for_spinner(*spin, rq)) { 1057 pr_err("%s: Spinner failed to start!\n", name); 1058 ret = -ETIMEDOUT; 1059 goto err_end; 1060 } 1061 1062 return 0; 1063 1064err_end: 1065 igt_spinner_end(*spin); 1066err_fini: 1067 igt_spinner_fini(*spin); 1068err_free: 1069 kfree(fetch_and_zero(spin)); 1070 return ret; 1071} 1072 1073static int 1074__read_slice_count(struct intel_context *ce, 1075 struct drm_i915_gem_object *obj, 1076 struct igt_spinner *spin, 1077 u32 *rpcs) 1078{ 1079 struct i915_request *rq = NULL; 1080 u32 s_mask, s_shift; 1081 unsigned int cnt; 1082 u32 *buf, val; 1083 long ret; 1084 1085 ret = emit_rpcs_query(obj, ce, &rq); 1086 if (ret) 1087 return ret; 1088 1089 if (spin) 1090 igt_spinner_end(spin); 1091 1092 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1093 i915_request_put(rq); 1094 if (ret < 0) 1095 return ret; 1096 1097 buf = i915_gem_object_pin_map(obj, I915_MAP_WB); 1098 if (IS_ERR(buf)) { 1099 ret = PTR_ERR(buf); 1100 return ret; 1101 } 1102 1103 if (INTEL_GEN(ce->engine->i915) >= 11) { 1104 s_mask = GEN11_RPCS_S_CNT_MASK; 1105 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1106 } else { 1107 s_mask = GEN8_RPCS_S_CNT_MASK; 1108 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1109 } 1110 1111 val = *buf; 1112 cnt = (val & s_mask) >> s_shift; 1113 *rpcs = val; 1114 1115 i915_gem_object_unpin_map(obj); 1116 1117 return cnt; 1118} 1119 1120static int 1121__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1122 const char *prefix, const char *suffix) 1123{ 1124 if (slices == expected) 1125 return 0; 1126 1127 if (slices < 0) { 1128 pr_err("%s: %s read slice count failed with %d%s\n", 1129 name, prefix, slices, suffix); 1130 return slices; 1131 } 1132 1133 pr_err("%s: %s slice count %d is not %u%s\n", 1134 name, prefix, slices, expected, suffix); 1135 1136 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1137 rpcs, slices, 1138 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1139 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1140 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1141 1142 return -EINVAL; 1143} 1144 1145static int 1146__sseu_finish(const char *name, 1147 unsigned int flags, 1148 struct intel_context *ce, 1149 struct drm_i915_gem_object *obj, 1150 unsigned int expected, 1151 struct igt_spinner *spin) 1152{ 1153 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1154 u32 rpcs = 0; 1155 int ret = 0; 1156 1157 if (flags & TEST_RESET) { 1158 ret = intel_engine_reset(ce->engine, "sseu"); 1159 if (ret) 1160 goto out; 1161 } 1162 1163 ret = __read_slice_count(ce, obj, 1164 flags & TEST_RESET ? NULL : spin, &rpcs); 1165 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1166 if (ret) 1167 goto out; 1168 1169 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1170 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1171 1172out: 1173 if (spin) 1174 igt_spinner_end(spin); 1175 1176 if ((flags & TEST_IDLE) && ret == 0) { 1177 ret = igt_flush_test(ce->engine->i915); 1178 if (ret) 1179 return ret; 1180 1181 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1182 ret = __check_rpcs(name, rpcs, ret, expected, 1183 "Context", " after idle!"); 1184 } 1185 1186 return ret; 1187} 1188 1189static int 1190__sseu_test(const char *name, 1191 unsigned int flags, 1192 struct intel_context *ce, 1193 struct drm_i915_gem_object *obj, 1194 struct intel_sseu sseu) 1195{ 1196 struct igt_spinner *spin = NULL; 1197 int ret; 1198 1199 intel_engine_pm_get(ce->engine); 1200 1201 ret = __sseu_prepare(name, flags, ce, &spin); 1202 if (ret) 1203 goto out_pm; 1204 1205 ret = intel_context_reconfigure_sseu(ce, sseu); 1206 if (ret) 1207 goto out_spin; 1208 1209 ret = __sseu_finish(name, flags, ce, obj, 1210 hweight32(sseu.slice_mask), spin); 1211 1212out_spin: 1213 if (spin) { 1214 igt_spinner_end(spin); 1215 igt_spinner_fini(spin); 1216 kfree(spin); 1217 } 1218out_pm: 1219 intel_engine_pm_put(ce->engine); 1220 return ret; 1221} 1222 1223static int 1224__igt_ctx_sseu(struct drm_i915_private *i915, 1225 const char *name, 1226 unsigned int flags) 1227{ 1228 struct drm_i915_gem_object *obj; 1229 int inst = 0; 1230 int ret = 0; 1231 1232 if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg) 1233 return 0; 1234 1235 if (flags & TEST_RESET) 1236 igt_global_reset_lock(&i915->gt); 1237 1238 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1239 if (IS_ERR(obj)) { 1240 ret = PTR_ERR(obj); 1241 goto out_unlock; 1242 } 1243 1244 do { 1245 struct intel_engine_cs *engine; 1246 struct intel_context *ce; 1247 struct intel_sseu pg_sseu; 1248 1249 engine = intel_engine_lookup_user(i915, 1250 I915_ENGINE_CLASS_RENDER, 1251 inst++); 1252 if (!engine) 1253 break; 1254 1255 if (hweight32(engine->sseu.slice_mask) < 2) 1256 continue; 1257 1258 /* 1259 * Gen11 VME friendly power-gated configuration with 1260 * half enabled sub-slices. 1261 */ 1262 pg_sseu = engine->sseu; 1263 pg_sseu.slice_mask = 1; 1264 pg_sseu.subslice_mask = 1265 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1266 1267 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1268 engine->name, name, flags, 1269 hweight32(engine->sseu.slice_mask), 1270 hweight32(pg_sseu.slice_mask)); 1271 1272 ce = intel_context_create(engine); 1273 if (IS_ERR(ce)) { 1274 ret = PTR_ERR(ce); 1275 goto out_put; 1276 } 1277 1278 ret = intel_context_pin(ce); 1279 if (ret) 1280 goto out_ce; 1281 1282 /* First set the default mask. */ 1283 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1284 if (ret) 1285 goto out_unpin; 1286 1287 /* Then set a power-gated configuration. */ 1288 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1289 if (ret) 1290 goto out_unpin; 1291 1292 /* Back to defaults. */ 1293 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1294 if (ret) 1295 goto out_unpin; 1296 1297 /* One last power-gated configuration for the road. */ 1298 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1299 if (ret) 1300 goto out_unpin; 1301 1302out_unpin: 1303 intel_context_unpin(ce); 1304out_ce: 1305 intel_context_put(ce); 1306 } while (!ret); 1307 1308 if (igt_flush_test(i915)) 1309 ret = -EIO; 1310 1311out_put: 1312 i915_gem_object_put(obj); 1313 1314out_unlock: 1315 if (flags & TEST_RESET) 1316 igt_global_reset_unlock(&i915->gt); 1317 1318 if (ret) 1319 pr_err("%s: Failed with %d!\n", name, ret); 1320 1321 return ret; 1322} 1323 1324static int igt_ctx_sseu(void *arg) 1325{ 1326 struct { 1327 const char *name; 1328 unsigned int flags; 1329 } *phase, phases[] = { 1330 { .name = "basic", .flags = 0 }, 1331 { .name = "idle", .flags = TEST_IDLE }, 1332 { .name = "busy", .flags = TEST_BUSY }, 1333 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1334 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1335 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1336 }; 1337 unsigned int i; 1338 int ret = 0; 1339 1340 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1341 i++, phase++) 1342 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1343 1344 return ret; 1345} 1346 1347static int igt_ctx_readonly(void *arg) 1348{ 1349 struct drm_i915_private *i915 = arg; 1350 unsigned long idx, ndwords, dw, num_engines; 1351 struct drm_i915_gem_object *obj = NULL; 1352 struct i915_request *tq[5] = {}; 1353 struct i915_gem_engines_iter it; 1354 struct i915_address_space *vm; 1355 struct i915_gem_context *ctx; 1356 struct intel_context *ce; 1357 struct igt_live_test t; 1358 I915_RND_STATE(prng); 1359 IGT_TIMEOUT(end_time); 1360 LIST_HEAD(objects); 1361 struct file *file; 1362 int err = -ENODEV; 1363 1364 /* 1365 * Create a few read-only objects (with the occasional writable object) 1366 * and try to write into these object checking that the GPU discards 1367 * any write to a read-only object. 1368 */ 1369 1370 file = mock_file(i915); 1371 if (IS_ERR(file)) 1372 return PTR_ERR(file); 1373 1374 err = igt_live_test_begin(&t, i915, __func__, ""); 1375 if (err) 1376 goto out_file; 1377 1378 ctx = live_context(i915, file); 1379 if (IS_ERR(ctx)) { 1380 err = PTR_ERR(ctx); 1381 goto out_file; 1382 } 1383 1384 vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm; 1385 if (!vm || !vm->has_read_only) { 1386 err = 0; 1387 goto out_file; 1388 } 1389 1390 num_engines = 0; 1391 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1392 if (intel_engine_can_store_dword(ce->engine)) 1393 num_engines++; 1394 i915_gem_context_unlock_engines(ctx); 1395 1396 ndwords = 0; 1397 dw = 0; 1398 while (!time_after(jiffies, end_time)) { 1399 for_each_gem_engine(ce, 1400 i915_gem_context_lock_engines(ctx), it) { 1401 if (!intel_engine_can_store_dword(ce->engine)) 1402 continue; 1403 1404 if (!obj) { 1405 obj = create_test_object(ce->vm, file, &objects); 1406 if (IS_ERR(obj)) { 1407 err = PTR_ERR(obj); 1408 i915_gem_context_unlock_engines(ctx); 1409 goto out_file; 1410 } 1411 1412 if (prandom_u32_state(&prng) & 1) 1413 i915_gem_object_set_readonly(obj); 1414 } 1415 1416 err = gpu_fill(ce, obj, dw); 1417 if (err) { 1418 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1419 ndwords, dw, max_dwords(obj), 1420 ce->engine->name, 1421 yesno(!!ctx_vm(ctx)), 1422 err); 1423 i915_gem_context_unlock_engines(ctx); 1424 goto out_file; 1425 } 1426 1427 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1428 if (err) { 1429 i915_gem_context_unlock_engines(ctx); 1430 goto out_file; 1431 } 1432 1433 if (++dw == max_dwords(obj)) { 1434 obj = NULL; 1435 dw = 0; 1436 } 1437 ndwords++; 1438 } 1439 i915_gem_context_unlock_engines(ctx); 1440 } 1441 pr_info("Submitted %lu dwords (across %lu engines)\n", 1442 ndwords, num_engines); 1443 1444 dw = 0; 1445 idx = 0; 1446 list_for_each_entry(obj, &objects, st_link) { 1447 unsigned int rem = 1448 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1449 unsigned int num_writes; 1450 1451 num_writes = rem; 1452 if (i915_gem_object_is_readonly(obj)) 1453 num_writes = 0; 1454 1455 err = cpu_check(obj, idx++, num_writes); 1456 if (err) 1457 break; 1458 1459 dw += rem; 1460 } 1461 1462out_file: 1463 throttle_release(tq, ARRAY_SIZE(tq)); 1464 if (igt_live_test_end(&t)) 1465 err = -EIO; 1466 1467 fput(file); 1468 return err; 1469} 1470 1471static int check_scratch(struct i915_address_space *vm, u64 offset) 1472{ 1473 struct drm_mm_node *node = 1474 __drm_mm_interval_first(&vm->mm, 1475 offset, offset + sizeof(u32) - 1); 1476 if (!node || node->start > offset) 1477 return 0; 1478 1479 GEM_BUG_ON(offset >= node->start + node->size); 1480 1481 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1482 upper_32_bits(offset), lower_32_bits(offset)); 1483 return -EINVAL; 1484} 1485 1486static int write_to_scratch(struct i915_gem_context *ctx, 1487 struct intel_engine_cs *engine, 1488 u64 offset, u32 value) 1489{ 1490 struct drm_i915_private *i915 = ctx->i915; 1491 struct drm_i915_gem_object *obj; 1492 struct i915_address_space *vm; 1493 struct i915_request *rq; 1494 struct i915_vma *vma; 1495 u32 *cmd; 1496 int err; 1497 1498 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1499 1500 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1501 if (IS_ERR(obj)) 1502 return PTR_ERR(obj); 1503 1504 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 1505 if (IS_ERR(cmd)) { 1506 err = PTR_ERR(cmd); 1507 goto out; 1508 } 1509 1510 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1511 if (INTEL_GEN(i915) >= 8) { 1512 *cmd++ = lower_32_bits(offset); 1513 *cmd++ = upper_32_bits(offset); 1514 } else { 1515 *cmd++ = 0; 1516 *cmd++ = offset; 1517 } 1518 *cmd++ = value; 1519 *cmd = MI_BATCH_BUFFER_END; 1520 __i915_gem_object_flush_map(obj, 0, 64); 1521 i915_gem_object_unpin_map(obj); 1522 1523 intel_gt_chipset_flush(engine->gt); 1524 1525 vm = i915_gem_context_get_vm_rcu(ctx); 1526 vma = i915_vma_instance(obj, vm, NULL); 1527 if (IS_ERR(vma)) { 1528 err = PTR_ERR(vma); 1529 goto out_vm; 1530 } 1531 1532 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1533 if (err) 1534 goto out_vm; 1535 1536 err = check_scratch(vm, offset); 1537 if (err) 1538 goto err_unpin; 1539 1540 rq = igt_request_alloc(ctx, engine); 1541 if (IS_ERR(rq)) { 1542 err = PTR_ERR(rq); 1543 goto err_unpin; 1544 } 1545 1546 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1547 if (err) 1548 goto err_request; 1549 1550 i915_vma_lock(vma); 1551 err = i915_request_await_object(rq, vma->obj, false); 1552 if (err == 0) 1553 err = i915_vma_move_to_active(vma, rq, 0); 1554 i915_vma_unlock(vma); 1555 if (err) 1556 goto skip_request; 1557 1558 i915_vma_unpin(vma); 1559 1560 i915_request_add(rq); 1561 1562 goto out_vm; 1563skip_request: 1564 i915_request_skip(rq, err); 1565err_request: 1566 i915_request_add(rq); 1567err_unpin: 1568 i915_vma_unpin(vma); 1569out_vm: 1570 i915_vm_put(vm); 1571out: 1572 i915_gem_object_put(obj); 1573 return err; 1574} 1575 1576static int read_from_scratch(struct i915_gem_context *ctx, 1577 struct intel_engine_cs *engine, 1578 u64 offset, u32 *value) 1579{ 1580 struct drm_i915_private *i915 = ctx->i915; 1581 struct drm_i915_gem_object *obj; 1582 struct i915_address_space *vm; 1583 const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ 1584 const u32 result = 0x100; 1585 struct i915_request *rq; 1586 struct i915_vma *vma; 1587 u32 *cmd; 1588 int err; 1589 1590 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1591 1592 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1593 if (IS_ERR(obj)) 1594 return PTR_ERR(obj); 1595 1596 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 1597 if (IS_ERR(cmd)) { 1598 err = PTR_ERR(cmd); 1599 goto out; 1600 } 1601 1602 memset(cmd, POISON_INUSE, PAGE_SIZE); 1603 if (INTEL_GEN(i915) >= 8) { 1604 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1605 *cmd++ = RCS_GPR0; 1606 *cmd++ = lower_32_bits(offset); 1607 *cmd++ = upper_32_bits(offset); 1608 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1609 *cmd++ = RCS_GPR0; 1610 *cmd++ = result; 1611 *cmd++ = 0; 1612 } else { 1613 *cmd++ = MI_LOAD_REGISTER_MEM; 1614 *cmd++ = RCS_GPR0; 1615 *cmd++ = offset; 1616 *cmd++ = MI_STORE_REGISTER_MEM; 1617 *cmd++ = RCS_GPR0; 1618 *cmd++ = result; 1619 } 1620 *cmd = MI_BATCH_BUFFER_END; 1621 1622 i915_gem_object_flush_map(obj); 1623 i915_gem_object_unpin_map(obj); 1624 1625 intel_gt_chipset_flush(engine->gt); 1626 1627 vm = i915_gem_context_get_vm_rcu(ctx); 1628 vma = i915_vma_instance(obj, vm, NULL); 1629 if (IS_ERR(vma)) { 1630 err = PTR_ERR(vma); 1631 goto out_vm; 1632 } 1633 1634 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1635 if (err) 1636 goto out_vm; 1637 1638 err = check_scratch(vm, offset); 1639 if (err) 1640 goto err_unpin; 1641 1642 rq = igt_request_alloc(ctx, engine); 1643 if (IS_ERR(rq)) { 1644 err = PTR_ERR(rq); 1645 goto err_unpin; 1646 } 1647 1648 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1649 if (err) 1650 goto err_request; 1651 1652 i915_vma_lock(vma); 1653 err = i915_request_await_object(rq, vma->obj, true); 1654 if (err == 0) 1655 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1656 i915_vma_unlock(vma); 1657 if (err) 1658 goto skip_request; 1659 1660 i915_vma_unpin(vma); 1661 i915_vma_close(vma); 1662 1663 i915_request_add(rq); 1664 1665 i915_gem_object_lock(obj); 1666 err = i915_gem_object_set_to_cpu_domain(obj, false); 1667 i915_gem_object_unlock(obj); 1668 if (err) 1669 goto out_vm; 1670 1671 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 1672 if (IS_ERR(cmd)) { 1673 err = PTR_ERR(cmd); 1674 goto out_vm; 1675 } 1676 1677 *value = cmd[result / sizeof(*cmd)]; 1678 i915_gem_object_unpin_map(obj); 1679 1680 goto out_vm; 1681skip_request: 1682 i915_request_skip(rq, err); 1683err_request: 1684 i915_request_add(rq); 1685err_unpin: 1686 i915_vma_unpin(vma); 1687out_vm: 1688 i915_vm_put(vm); 1689out: 1690 i915_gem_object_put(obj); 1691 return err; 1692} 1693 1694static int igt_vm_isolation(void *arg) 1695{ 1696 struct drm_i915_private *i915 = arg; 1697 struct i915_gem_context *ctx_a, *ctx_b; 1698 unsigned long num_engines, count; 1699 struct intel_engine_cs *engine; 1700 struct igt_live_test t; 1701 I915_RND_STATE(prng); 1702 struct file *file; 1703 u64 vm_total; 1704 int err; 1705 1706 if (INTEL_GEN(i915) < 7) 1707 return 0; 1708 1709 /* 1710 * The simple goal here is that a write into one context is not 1711 * observed in a second (separate page tables and scratch). 1712 */ 1713 1714 file = mock_file(i915); 1715 if (IS_ERR(file)) 1716 return PTR_ERR(file); 1717 1718 err = igt_live_test_begin(&t, i915, __func__, ""); 1719 if (err) 1720 goto out_file; 1721 1722 ctx_a = live_context(i915, file); 1723 if (IS_ERR(ctx_a)) { 1724 err = PTR_ERR(ctx_a); 1725 goto out_file; 1726 } 1727 1728 ctx_b = live_context(i915, file); 1729 if (IS_ERR(ctx_b)) { 1730 err = PTR_ERR(ctx_b); 1731 goto out_file; 1732 } 1733 1734 /* We can only test vm isolation, if the vm are distinct */ 1735 if (ctx_vm(ctx_a) == ctx_vm(ctx_b)) 1736 goto out_file; 1737 1738 vm_total = ctx_vm(ctx_a)->total; 1739 GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total); 1740 vm_total -= I915_GTT_PAGE_SIZE; 1741 1742 count = 0; 1743 num_engines = 0; 1744 for_each_uabi_engine(engine, i915) { 1745 IGT_TIMEOUT(end_time); 1746 unsigned long this = 0; 1747 1748 if (!intel_engine_can_store_dword(engine)) 1749 continue; 1750 1751 while (!__igt_timeout(end_time, NULL)) { 1752 u32 value = 0xc5c5c5c5; 1753 u64 offset; 1754 1755 div64_u64_rem(i915_prandom_u64_state(&prng), 1756 vm_total, &offset); 1757 offset = round_down(offset, alignof_dword); 1758 offset += I915_GTT_PAGE_SIZE; 1759 1760 err = write_to_scratch(ctx_a, engine, 1761 offset, 0xdeadbeef); 1762 if (err == 0) 1763 err = read_from_scratch(ctx_b, engine, 1764 offset, &value); 1765 if (err) 1766 goto out_file; 1767 1768 if (value) { 1769 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1770 engine->name, value, 1771 upper_32_bits(offset), 1772 lower_32_bits(offset), 1773 this); 1774 err = -EINVAL; 1775 goto out_file; 1776 } 1777 1778 this++; 1779 } 1780 count += this; 1781 num_engines++; 1782 } 1783 pr_info("Checked %lu scratch offsets across %lu engines\n", 1784 count, num_engines); 1785 1786out_file: 1787 if (igt_live_test_end(&t)) 1788 err = -EIO; 1789 fput(file); 1790 return err; 1791} 1792 1793static bool skip_unused_engines(struct intel_context *ce, void *data) 1794{ 1795 return !ce->state; 1796} 1797 1798static void mock_barrier_task(void *data) 1799{ 1800 unsigned int *counter = data; 1801 1802 ++*counter; 1803} 1804 1805static int mock_context_barrier(void *arg) 1806{ 1807#undef pr_fmt 1808#define pr_fmt(x) "context_barrier_task():" # x 1809 struct drm_i915_private *i915 = arg; 1810 struct i915_gem_context *ctx; 1811 struct i915_request *rq; 1812 unsigned int counter; 1813 int err; 1814 1815 /* 1816 * The context barrier provides us with a callback after it emits 1817 * a request; useful for retiring old state after loading new. 1818 */ 1819 1820 ctx = mock_context(i915, "mock"); 1821 if (!ctx) 1822 return -ENOMEM; 1823 1824 counter = 0; 1825 err = context_barrier_task(ctx, 0, 1826 NULL, NULL, mock_barrier_task, &counter); 1827 if (err) { 1828 pr_err("Failed at line %d, err=%d\n", __LINE__, err); 1829 goto out; 1830 } 1831 if (counter == 0) { 1832 pr_err("Did not retire immediately with 0 engines\n"); 1833 err = -EINVAL; 1834 goto out; 1835 } 1836 1837 counter = 0; 1838 err = context_barrier_task(ctx, ALL_ENGINES, 1839 skip_unused_engines, 1840 NULL, 1841 mock_barrier_task, 1842 &counter); 1843 if (err) { 1844 pr_err("Failed at line %d, err=%d\n", __LINE__, err); 1845 goto out; 1846 } 1847 if (counter == 0) { 1848 pr_err("Did not retire immediately for all unused engines\n"); 1849 err = -EINVAL; 1850 goto out; 1851 } 1852 1853 rq = igt_request_alloc(ctx, i915->engine[RCS0]); 1854 if (IS_ERR(rq)) { 1855 pr_err("Request allocation failed!\n"); 1856 goto out; 1857 } 1858 i915_request_add(rq); 1859 1860 counter = 0; 1861 context_barrier_inject_fault = BIT(RCS0); 1862 err = context_barrier_task(ctx, ALL_ENGINES, 1863 NULL, NULL, mock_barrier_task, &counter); 1864 context_barrier_inject_fault = 0; 1865 if (err == -ENXIO) 1866 err = 0; 1867 else 1868 pr_err("Did not hit fault injection!\n"); 1869 if (counter != 0) { 1870 pr_err("Invoked callback on error!\n"); 1871 err = -EIO; 1872 } 1873 if (err) 1874 goto out; 1875 1876 counter = 0; 1877 err = context_barrier_task(ctx, ALL_ENGINES, 1878 skip_unused_engines, 1879 NULL, 1880 mock_barrier_task, 1881 &counter); 1882 if (err) { 1883 pr_err("Failed at line %d, err=%d\n", __LINE__, err); 1884 goto out; 1885 } 1886 mock_device_flush(i915); 1887 if (counter == 0) { 1888 pr_err("Did not retire on each active engines\n"); 1889 err = -EINVAL; 1890 goto out; 1891 } 1892 1893out: 1894 mock_context_close(ctx); 1895 return err; 1896#undef pr_fmt 1897#define pr_fmt(x) x 1898} 1899 1900int i915_gem_context_mock_selftests(void) 1901{ 1902 static const struct i915_subtest tests[] = { 1903 SUBTEST(mock_context_barrier), 1904 }; 1905 struct drm_i915_private *i915; 1906 int err; 1907 1908 i915 = mock_gem_device(); 1909 if (!i915) 1910 return -ENOMEM; 1911 1912 err = i915_subtests(tests, i915); 1913 1914 drm_dev_put(&i915->drm); 1915 return err; 1916} 1917 1918int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1919{ 1920 static const struct i915_subtest tests[] = { 1921 SUBTEST(live_nop_switch), 1922 SUBTEST(live_parallel_switch), 1923 SUBTEST(igt_ctx_exec), 1924 SUBTEST(igt_ctx_readonly), 1925 SUBTEST(igt_ctx_sseu), 1926 SUBTEST(igt_shared_ctx_exec), 1927 SUBTEST(igt_vm_isolation), 1928 }; 1929 1930 if (intel_gt_is_wedged(&i915->gt)) 1931 return 0; 1932 1933 return i915_live_subtests(tests, i915); 1934} 1935