1/*	$NetBSD: amdgpu_debugfs.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $	*/
2
3/*
4 * Copyright 2008 Advanced Micro Devices, Inc.
5 * Copyright 2008 Red Hat Inc.
6 * Copyright 2009 Jerome Glisse.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: amdgpu_debugfs.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $");
30
31#include <linux/kthread.h>
32#include <linux/pci.h>
33#include <linux/uaccess.h>
34#include <linux/pm_runtime.h>
35
36#include <drm/drm_debugfs.h>
37
38#include "amdgpu.h"
39
40/**
41 * amdgpu_debugfs_add_files - Add simple debugfs entries
42 *
43 * @adev:  Device to attach debugfs entries to
44 * @files:  Array of function callbacks that respond to reads
45 * @nfiles: Number of callbacks to register
46 *
47 */
48int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
49			     const struct drm_info_list *files,
50			     unsigned nfiles)
51{
52	unsigned i;
53
54	for (i = 0; i < adev->debugfs_count; i++) {
55		if (adev->debugfs[i].files == files) {
56			/* Already registered */
57			return 0;
58		}
59	}
60
61	i = adev->debugfs_count + 1;
62	if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) {
63		DRM_ERROR("Reached maximum number of debugfs components.\n");
64		DRM_ERROR("Report so we increase "
65			  "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n");
66		return -EINVAL;
67	}
68	adev->debugfs[adev->debugfs_count].files = files;
69	adev->debugfs[adev->debugfs_count].num_files = nfiles;
70	adev->debugfs_count = i;
71#if defined(CONFIG_DEBUG_FS)
72	drm_debugfs_create_files(files, nfiles,
73				 adev->ddev->primary->debugfs_root,
74				 adev->ddev->primary);
75#endif
76	return 0;
77}
78
79#if defined(CONFIG_DEBUG_FS)
80
81/**
82 * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
83 *
84 * @read: True if reading
85 * @f: open file handle
86 * @buf: User buffer to write/read to
87 * @size: Number of bytes to write/read
88 * @pos:  Offset to seek to
89 *
90 * This debugfs entry has special meaning on the offset being sought.
91 * Various bits have different meanings:
92 *
93 * Bit 62:  Indicates a GRBM bank switch is needed
94 * Bit 61:  Indicates a SRBM bank switch is needed (implies bit 62 is
95 * 			zero)
96 * Bits 24..33: The SE or ME selector if needed
97 * Bits 34..43: The SH (or SA) or PIPE selector if needed
98 * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
99 *
100 * Bit 23:  Indicates that the PM power gating lock should be held
101 * 			This is necessary to read registers that might be
102 * 			unreliable during a power gating transistion.
103 *
104 * The lower bits are the BYTE offset of the register to read.  This
105 * allows reading multiple registers in a single call and having
106 * the returned size reflect that.
107 */
108static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
109		char __user *buf, size_t size, loff_t *pos)
110{
111	struct amdgpu_device *adev = file_inode(f)->i_private;
112	ssize_t result = 0;
113	int r;
114	bool pm_pg_lock, use_bank, use_ring;
115	unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
116
117	pm_pg_lock = use_bank = use_ring = false;
118	instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0;
119
120	if (size & 0x3 || *pos & 0x3 ||
121			((*pos & (1ULL << 62)) && (*pos & (1ULL << 61))))
122		return -EINVAL;
123
124	/* are we reading registers for which a PG lock is necessary? */
125	pm_pg_lock = (*pos >> 23) & 1;
126
127	if (*pos & (1ULL << 62)) {
128		se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
129		sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
130		instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
131
132		if (se_bank == 0x3FF)
133			se_bank = 0xFFFFFFFF;
134		if (sh_bank == 0x3FF)
135			sh_bank = 0xFFFFFFFF;
136		if (instance_bank == 0x3FF)
137			instance_bank = 0xFFFFFFFF;
138		use_bank = true;
139	} else if (*pos & (1ULL << 61)) {
140
141		me = (*pos & GENMASK_ULL(33, 24)) >> 24;
142		pipe = (*pos & GENMASK_ULL(43, 34)) >> 34;
143		queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
144		vmid = (*pos & GENMASK_ULL(58, 54)) >> 54;
145
146		use_ring = true;
147	} else {
148		use_bank = use_ring = false;
149	}
150
151	*pos &= (1UL << 22) - 1;
152
153	r = pm_runtime_get_sync(adev->ddev->dev);
154	if (r < 0)
155		return r;
156
157	if (use_bank) {
158		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
159		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
160			pm_runtime_mark_last_busy(adev->ddev->dev);
161			pm_runtime_put_autosuspend(adev->ddev->dev);
162			return -EINVAL;
163		}
164		mutex_lock(&adev->grbm_idx_mutex);
165		amdgpu_gfx_select_se_sh(adev, se_bank,
166					sh_bank, instance_bank);
167	} else if (use_ring) {
168		mutex_lock(&adev->srbm_mutex);
169		amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid);
170	}
171
172	if (pm_pg_lock)
173		mutex_lock(&adev->pm.mutex);
174
175	while (size) {
176		uint32_t value;
177
178		if (read) {
179			value = RREG32(*pos >> 2);
180			r = put_user(value, (uint32_t *)buf);
181		} else {
182			r = get_user(value, (uint32_t *)buf);
183			if (!r)
184				WREG32(*pos >> 2, value);
185		}
186		if (r) {
187			result = r;
188			goto end;
189		}
190
191		result += 4;
192		buf += 4;
193		*pos += 4;
194		size -= 4;
195	}
196
197end:
198	if (use_bank) {
199		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
200		mutex_unlock(&adev->grbm_idx_mutex);
201	} else if (use_ring) {
202		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
203		mutex_unlock(&adev->srbm_mutex);
204	}
205
206	if (pm_pg_lock)
207		mutex_unlock(&adev->pm.mutex);
208
209	pm_runtime_mark_last_busy(adev->ddev->dev);
210	pm_runtime_put_autosuspend(adev->ddev->dev);
211
212	return result;
213}
214
215/**
216 * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
217 */
218static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
219					size_t size, loff_t *pos)
220{
221	return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
222}
223
224/**
225 * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
226 */
227static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
228					 size_t size, loff_t *pos)
229{
230	return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
231}
232
233
234/**
235 * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
236 *
237 * @f: open file handle
238 * @buf: User buffer to store read data in
239 * @size: Number of bytes to read
240 * @pos:  Offset to seek to
241 *
242 * The lower bits are the BYTE offset of the register to read.  This
243 * allows reading multiple registers in a single call and having
244 * the returned size reflect that.
245 */
246static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
247					size_t size, loff_t *pos)
248{
249	struct amdgpu_device *adev = file_inode(f)->i_private;
250	ssize_t result = 0;
251	int r;
252
253	if (size & 0x3 || *pos & 0x3)
254		return -EINVAL;
255
256	r = pm_runtime_get_sync(adev->ddev->dev);
257	if (r < 0)
258		return r;
259
260	while (size) {
261		uint32_t value;
262
263		value = RREG32_PCIE(*pos >> 2);
264		r = put_user(value, (uint32_t *)buf);
265		if (r) {
266			pm_runtime_mark_last_busy(adev->ddev->dev);
267			pm_runtime_put_autosuspend(adev->ddev->dev);
268			return r;
269		}
270
271		result += 4;
272		buf += 4;
273		*pos += 4;
274		size -= 4;
275	}
276
277	pm_runtime_mark_last_busy(adev->ddev->dev);
278	pm_runtime_put_autosuspend(adev->ddev->dev);
279
280	return result;
281}
282
283/**
284 * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
285 *
286 * @f: open file handle
287 * @buf: User buffer to write data from
288 * @size: Number of bytes to write
289 * @pos:  Offset to seek to
290 *
291 * The lower bits are the BYTE offset of the register to write.  This
292 * allows writing multiple registers in a single call and having
293 * the returned size reflect that.
294 */
295static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
296					 size_t size, loff_t *pos)
297{
298	struct amdgpu_device *adev = file_inode(f)->i_private;
299	ssize_t result = 0;
300	int r;
301
302	if (size & 0x3 || *pos & 0x3)
303		return -EINVAL;
304
305	r = pm_runtime_get_sync(adev->ddev->dev);
306	if (r < 0)
307		return r;
308
309	while (size) {
310		uint32_t value;
311
312		r = get_user(value, (uint32_t *)buf);
313		if (r) {
314			pm_runtime_mark_last_busy(adev->ddev->dev);
315			pm_runtime_put_autosuspend(adev->ddev->dev);
316			return r;
317		}
318
319		WREG32_PCIE(*pos >> 2, value);
320
321		result += 4;
322		buf += 4;
323		*pos += 4;
324		size -= 4;
325	}
326
327	pm_runtime_mark_last_busy(adev->ddev->dev);
328	pm_runtime_put_autosuspend(adev->ddev->dev);
329
330	return result;
331}
332
333/**
334 * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
335 *
336 * @f: open file handle
337 * @buf: User buffer to store read data in
338 * @size: Number of bytes to read
339 * @pos:  Offset to seek to
340 *
341 * The lower bits are the BYTE offset of the register to read.  This
342 * allows reading multiple registers in a single call and having
343 * the returned size reflect that.
344 */
345static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
346					size_t size, loff_t *pos)
347{
348	struct amdgpu_device *adev = file_inode(f)->i_private;
349	ssize_t result = 0;
350	int r;
351
352	if (size & 0x3 || *pos & 0x3)
353		return -EINVAL;
354
355	r = pm_runtime_get_sync(adev->ddev->dev);
356	if (r < 0)
357		return r;
358
359	while (size) {
360		uint32_t value;
361
362		value = RREG32_DIDT(*pos >> 2);
363		r = put_user(value, (uint32_t *)buf);
364		if (r) {
365			pm_runtime_mark_last_busy(adev->ddev->dev);
366			pm_runtime_put_autosuspend(adev->ddev->dev);
367			return r;
368		}
369
370		result += 4;
371		buf += 4;
372		*pos += 4;
373		size -= 4;
374	}
375
376	pm_runtime_mark_last_busy(adev->ddev->dev);
377	pm_runtime_put_autosuspend(adev->ddev->dev);
378
379	return result;
380}
381
382/**
383 * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
384 *
385 * @f: open file handle
386 * @buf: User buffer to write data from
387 * @size: Number of bytes to write
388 * @pos:  Offset to seek to
389 *
390 * The lower bits are the BYTE offset of the register to write.  This
391 * allows writing multiple registers in a single call and having
392 * the returned size reflect that.
393 */
394static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
395					 size_t size, loff_t *pos)
396{
397	struct amdgpu_device *adev = file_inode(f)->i_private;
398	ssize_t result = 0;
399	int r;
400
401	if (size & 0x3 || *pos & 0x3)
402		return -EINVAL;
403
404	r = pm_runtime_get_sync(adev->ddev->dev);
405	if (r < 0)
406		return r;
407
408	while (size) {
409		uint32_t value;
410
411		r = get_user(value, (uint32_t *)buf);
412		if (r) {
413			pm_runtime_mark_last_busy(adev->ddev->dev);
414			pm_runtime_put_autosuspend(adev->ddev->dev);
415			return r;
416		}
417
418		WREG32_DIDT(*pos >> 2, value);
419
420		result += 4;
421		buf += 4;
422		*pos += 4;
423		size -= 4;
424	}
425
426	pm_runtime_mark_last_busy(adev->ddev->dev);
427	pm_runtime_put_autosuspend(adev->ddev->dev);
428
429	return result;
430}
431
432/**
433 * amdgpu_debugfs_regs_smc_read - Read from a SMC register
434 *
435 * @f: open file handle
436 * @buf: User buffer to store read data in
437 * @size: Number of bytes to read
438 * @pos:  Offset to seek to
439 *
440 * The lower bits are the BYTE offset of the register to read.  This
441 * allows reading multiple registers in a single call and having
442 * the returned size reflect that.
443 */
444static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
445					size_t size, loff_t *pos)
446{
447	struct amdgpu_device *adev = file_inode(f)->i_private;
448	ssize_t result = 0;
449	int r;
450
451	if (size & 0x3 || *pos & 0x3)
452		return -EINVAL;
453
454	r = pm_runtime_get_sync(adev->ddev->dev);
455	if (r < 0)
456		return r;
457
458	while (size) {
459		uint32_t value;
460
461		value = RREG32_SMC(*pos);
462		r = put_user(value, (uint32_t *)buf);
463		if (r) {
464			pm_runtime_mark_last_busy(adev->ddev->dev);
465			pm_runtime_put_autosuspend(adev->ddev->dev);
466			return r;
467		}
468
469		result += 4;
470		buf += 4;
471		*pos += 4;
472		size -= 4;
473	}
474
475	pm_runtime_mark_last_busy(adev->ddev->dev);
476	pm_runtime_put_autosuspend(adev->ddev->dev);
477
478	return result;
479}
480
481/**
482 * amdgpu_debugfs_regs_smc_write - Write to a SMC register
483 *
484 * @f: open file handle
485 * @buf: User buffer to write data from
486 * @size: Number of bytes to write
487 * @pos:  Offset to seek to
488 *
489 * The lower bits are the BYTE offset of the register to write.  This
490 * allows writing multiple registers in a single call and having
491 * the returned size reflect that.
492 */
493static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
494					 size_t size, loff_t *pos)
495{
496	struct amdgpu_device *adev = file_inode(f)->i_private;
497	ssize_t result = 0;
498	int r;
499
500	if (size & 0x3 || *pos & 0x3)
501		return -EINVAL;
502
503	r = pm_runtime_get_sync(adev->ddev->dev);
504	if (r < 0)
505		return r;
506
507	while (size) {
508		uint32_t value;
509
510		r = get_user(value, (uint32_t *)buf);
511		if (r) {
512			pm_runtime_mark_last_busy(adev->ddev->dev);
513			pm_runtime_put_autosuspend(adev->ddev->dev);
514			return r;
515		}
516
517		WREG32_SMC(*pos, value);
518
519		result += 4;
520		buf += 4;
521		*pos += 4;
522		size -= 4;
523	}
524
525	pm_runtime_mark_last_busy(adev->ddev->dev);
526	pm_runtime_put_autosuspend(adev->ddev->dev);
527
528	return result;
529}
530
531/**
532 * amdgpu_debugfs_gca_config_read - Read from gfx config data
533 *
534 * @f: open file handle
535 * @buf: User buffer to store read data in
536 * @size: Number of bytes to read
537 * @pos:  Offset to seek to
538 *
539 * This file is used to access configuration data in a somewhat
540 * stable fashion.  The format is a series of DWORDs with the first
541 * indicating which revision it is.  New content is appended to the
542 * end so that older software can still read the data.
543 */
544
545static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
546					size_t size, loff_t *pos)
547{
548	struct amdgpu_device *adev = file_inode(f)->i_private;
549	ssize_t result = 0;
550	int r;
551	uint32_t *config, no_regs = 0;
552
553	if (size & 0x3 || *pos & 0x3)
554		return -EINVAL;
555
556	config = kmalloc_array(256, sizeof(*config), GFP_KERNEL);
557	if (!config)
558		return -ENOMEM;
559
560	/* version, increment each time something is added */
561	config[no_regs++] = 3;
562	config[no_regs++] = adev->gfx.config.max_shader_engines;
563	config[no_regs++] = adev->gfx.config.max_tile_pipes;
564	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
565	config[no_regs++] = adev->gfx.config.max_sh_per_se;
566	config[no_regs++] = adev->gfx.config.max_backends_per_se;
567	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
568	config[no_regs++] = adev->gfx.config.max_gprs;
569	config[no_regs++] = adev->gfx.config.max_gs_threads;
570	config[no_regs++] = adev->gfx.config.max_hw_contexts;
571	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
572	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
573	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
574	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
575	config[no_regs++] = adev->gfx.config.num_tile_pipes;
576	config[no_regs++] = adev->gfx.config.backend_enable_mask;
577	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
578	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
579	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
580	config[no_regs++] = adev->gfx.config.num_gpus;
581	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
582	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
583	config[no_regs++] = adev->gfx.config.gb_addr_config;
584	config[no_regs++] = adev->gfx.config.num_rbs;
585
586	/* rev==1 */
587	config[no_regs++] = adev->rev_id;
588	config[no_regs++] = adev->pg_flags;
589	config[no_regs++] = adev->cg_flags;
590
591	/* rev==2 */
592	config[no_regs++] = adev->family;
593	config[no_regs++] = adev->external_rev_id;
594
595	/* rev==3 */
596	config[no_regs++] = adev->pdev->device;
597	config[no_regs++] = adev->pdev->revision;
598	config[no_regs++] = adev->pdev->subsystem_device;
599	config[no_regs++] = adev->pdev->subsystem_vendor;
600
601	while (size && (*pos < no_regs * 4)) {
602		uint32_t value;
603
604		value = config[*pos >> 2];
605		r = put_user(value, (uint32_t *)buf);
606		if (r) {
607			kfree(config);
608			return r;
609		}
610
611		result += 4;
612		buf += 4;
613		*pos += 4;
614		size -= 4;
615	}
616
617	kfree(config);
618	return result;
619}
620
621/**
622 * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
623 *
624 * @f: open file handle
625 * @buf: User buffer to store read data in
626 * @size: Number of bytes to read
627 * @pos:  Offset to seek to
628 *
629 * The offset is treated as the BYTE address of one of the sensors
630 * enumerated in amd/include/kgd_pp_interface.h under the
631 * 'amd_pp_sensors' enumeration.  For instance to read the UVD VCLK
632 * you would use the offset 3 * 4 = 12.
633 */
634static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
635					size_t size, loff_t *pos)
636{
637	struct amdgpu_device *adev = file_inode(f)->i_private;
638	int idx, x, outsize, r, valuesize;
639	uint32_t values[16];
640
641	if (size & 3 || *pos & 0x3)
642		return -EINVAL;
643
644	if (!adev->pm.dpm_enabled)
645		return -EINVAL;
646
647	/* convert offset to sensor number */
648	idx = *pos >> 2;
649
650	valuesize = sizeof(values);
651
652	r = pm_runtime_get_sync(adev->ddev->dev);
653	if (r < 0)
654		return r;
655
656	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
657
658	pm_runtime_mark_last_busy(adev->ddev->dev);
659	pm_runtime_put_autosuspend(adev->ddev->dev);
660
661	if (r)
662		return r;
663
664	if (size > valuesize)
665		return -EINVAL;
666
667	outsize = 0;
668	x = 0;
669	if (!r) {
670		while (size) {
671			r = put_user(values[x++], (int32_t *)buf);
672			buf += 4;
673			size -= 4;
674			outsize += 4;
675		}
676	}
677
678	return !r ? outsize : r;
679}
680
681/** amdgpu_debugfs_wave_read - Read WAVE STATUS data
682 *
683 * @f: open file handle
684 * @buf: User buffer to store read data in
685 * @size: Number of bytes to read
686 * @pos:  Offset to seek to
687 *
688 * The offset being sought changes which wave that the status data
689 * will be returned for.  The bits are used as follows:
690 *
691 * Bits 0..6: 	Byte offset into data
692 * Bits 7..14:	SE selector
693 * Bits 15..22:	SH/SA selector
694 * Bits 23..30: CU/{WGP+SIMD} selector
695 * Bits 31..36: WAVE ID selector
696 * Bits 37..44: SIMD ID selector
697 *
698 * The returned data begins with one DWORD of version information
699 * Followed by WAVE STATUS registers relevant to the GFX IP version
700 * being used.  See gfx_v8_0_read_wave_data() for an example output.
701 */
702static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
703					size_t size, loff_t *pos)
704{
705	struct amdgpu_device *adev = f->f_inode->i_private;
706	int r, x;
707	ssize_t result=0;
708	uint32_t offset, se, sh, cu, wave, simd, data[32];
709
710	if (size & 3 || *pos & 3)
711		return -EINVAL;
712
713	/* decode offset */
714	offset = (*pos & GENMASK_ULL(6, 0));
715	se = (*pos & GENMASK_ULL(14, 7)) >> 7;
716	sh = (*pos & GENMASK_ULL(22, 15)) >> 15;
717	cu = (*pos & GENMASK_ULL(30, 23)) >> 23;
718	wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
719	simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
720
721	r = pm_runtime_get_sync(adev->ddev->dev);
722	if (r < 0)
723		return r;
724
725	/* switch to the specific se/sh/cu */
726	mutex_lock(&adev->grbm_idx_mutex);
727	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
728
729	x = 0;
730	if (adev->gfx.funcs->read_wave_data)
731		adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
732
733	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
734	mutex_unlock(&adev->grbm_idx_mutex);
735
736	pm_runtime_mark_last_busy(adev->ddev->dev);
737	pm_runtime_put_autosuspend(adev->ddev->dev);
738
739	if (!x)
740		return -EINVAL;
741
742	while (size && (offset < x * 4)) {
743		uint32_t value;
744
745		value = data[offset >> 2];
746		r = put_user(value, (uint32_t *)buf);
747		if (r)
748			return r;
749
750		result += 4;
751		buf += 4;
752		offset += 4;
753		size -= 4;
754	}
755
756	return result;
757}
758
759/** amdgpu_debugfs_gpr_read - Read wave gprs
760 *
761 * @f: open file handle
762 * @buf: User buffer to store read data in
763 * @size: Number of bytes to read
764 * @pos:  Offset to seek to
765 *
766 * The offset being sought changes which wave that the status data
767 * will be returned for.  The bits are used as follows:
768 *
769 * Bits 0..11:	Byte offset into data
770 * Bits 12..19:	SE selector
771 * Bits 20..27:	SH/SA selector
772 * Bits 28..35: CU/{WGP+SIMD} selector
773 * Bits 36..43: WAVE ID selector
774 * Bits 37..44: SIMD ID selector
775 * Bits 52..59: Thread selector
776 * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
777 *
778 * The return data comes from the SGPR or VGPR register bank for
779 * the selected operational unit.
780 */
781static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
782					size_t size, loff_t *pos)
783{
784	struct amdgpu_device *adev = f->f_inode->i_private;
785	int r;
786	ssize_t result = 0;
787	uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
788
789	if (size & 3 || *pos & 3)
790		return -EINVAL;
791
792	/* decode offset */
793	offset = *pos & GENMASK_ULL(11, 0);
794	se = (*pos & GENMASK_ULL(19, 12)) >> 12;
795	sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
796	cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
797	wave = (*pos & GENMASK_ULL(43, 36)) >> 36;
798	simd = (*pos & GENMASK_ULL(51, 44)) >> 44;
799	thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
800	bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
801
802	data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
803	if (!data)
804		return -ENOMEM;
805
806	r = pm_runtime_get_sync(adev->ddev->dev);
807	if (r < 0)
808		return r;
809
810	/* switch to the specific se/sh/cu */
811	mutex_lock(&adev->grbm_idx_mutex);
812	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
813
814	if (bank == 0) {
815		if (adev->gfx.funcs->read_wave_vgprs)
816			adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
817	} else {
818		if (adev->gfx.funcs->read_wave_sgprs)
819			adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
820	}
821
822	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
823	mutex_unlock(&adev->grbm_idx_mutex);
824
825	pm_runtime_mark_last_busy(adev->ddev->dev);
826	pm_runtime_put_autosuspend(adev->ddev->dev);
827
828	while (size) {
829		uint32_t value;
830
831		value = data[offset++];
832		r = put_user(value, (uint32_t *)buf);
833		if (r) {
834			result = r;
835			goto err;
836		}
837
838		result += 4;
839		buf += 4;
840		size -= 4;
841	}
842
843err:
844	kfree(data);
845	return result;
846}
847
848static const struct file_operations amdgpu_debugfs_regs_fops = {
849	.owner = THIS_MODULE,
850	.read = amdgpu_debugfs_regs_read,
851	.write = amdgpu_debugfs_regs_write,
852	.llseek = default_llseek
853};
854static const struct file_operations amdgpu_debugfs_regs_didt_fops = {
855	.owner = THIS_MODULE,
856	.read = amdgpu_debugfs_regs_didt_read,
857	.write = amdgpu_debugfs_regs_didt_write,
858	.llseek = default_llseek
859};
860static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
861	.owner = THIS_MODULE,
862	.read = amdgpu_debugfs_regs_pcie_read,
863	.write = amdgpu_debugfs_regs_pcie_write,
864	.llseek = default_llseek
865};
866static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
867	.owner = THIS_MODULE,
868	.read = amdgpu_debugfs_regs_smc_read,
869	.write = amdgpu_debugfs_regs_smc_write,
870	.llseek = default_llseek
871};
872
873static const struct file_operations amdgpu_debugfs_gca_config_fops = {
874	.owner = THIS_MODULE,
875	.read = amdgpu_debugfs_gca_config_read,
876	.llseek = default_llseek
877};
878
879static const struct file_operations amdgpu_debugfs_sensors_fops = {
880	.owner = THIS_MODULE,
881	.read = amdgpu_debugfs_sensor_read,
882	.llseek = default_llseek
883};
884
885static const struct file_operations amdgpu_debugfs_wave_fops = {
886	.owner = THIS_MODULE,
887	.read = amdgpu_debugfs_wave_read,
888	.llseek = default_llseek
889};
890static const struct file_operations amdgpu_debugfs_gpr_fops = {
891	.owner = THIS_MODULE,
892	.read = amdgpu_debugfs_gpr_read,
893	.llseek = default_llseek
894};
895
896static const struct file_operations *debugfs_regs[] = {
897	&amdgpu_debugfs_regs_fops,
898	&amdgpu_debugfs_regs_didt_fops,
899	&amdgpu_debugfs_regs_pcie_fops,
900	&amdgpu_debugfs_regs_smc_fops,
901	&amdgpu_debugfs_gca_config_fops,
902	&amdgpu_debugfs_sensors_fops,
903	&amdgpu_debugfs_wave_fops,
904	&amdgpu_debugfs_gpr_fops,
905};
906
907static const char *debugfs_regs_names[] = {
908	"amdgpu_regs",
909	"amdgpu_regs_didt",
910	"amdgpu_regs_pcie",
911	"amdgpu_regs_smc",
912	"amdgpu_gca_config",
913	"amdgpu_sensors",
914	"amdgpu_wave",
915	"amdgpu_gpr",
916};
917
918/**
919 * amdgpu_debugfs_regs_init -	Initialize debugfs entries that provide
920 * 								register access.
921 *
922 * @adev: The device to attach the debugfs entries to
923 */
924int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
925{
926	struct drm_minor *minor = adev->ddev->primary;
927	struct dentry *ent, *root = minor->debugfs_root;
928	unsigned int i;
929
930	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
931		ent = debugfs_create_file(debugfs_regs_names[i],
932					  S_IFREG | S_IRUGO, root,
933					  adev, debugfs_regs[i]);
934		if (!i && !IS_ERR_OR_NULL(ent))
935			i_size_write(ent->d_inode, adev->rmmio_size);
936		adev->debugfs_regs[i] = ent;
937	}
938
939	return 0;
940}
941
942void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
943{
944	unsigned i;
945
946	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
947		if (adev->debugfs_regs[i]) {
948			debugfs_remove(adev->debugfs_regs[i]);
949			adev->debugfs_regs[i] = NULL;
950		}
951	}
952}
953
954static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
955{
956	struct drm_info_node *node = (struct drm_info_node *) m->private;
957	struct drm_device *dev = node->minor->dev;
958	struct amdgpu_device *adev = dev->dev_private;
959	int r = 0, i;
960
961	r = pm_runtime_get_sync(dev->dev);
962	if (r < 0)
963		return r;
964
965	/* Avoid accidently unparking the sched thread during GPU reset */
966	mutex_lock(&adev->lock_reset);
967
968	/* hold on the scheduler */
969	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
970		struct amdgpu_ring *ring = adev->rings[i];
971
972		if (!ring || !ring->sched.thread)
973			continue;
974		kthread_park(ring->sched.thread);
975	}
976
977	seq_printf(m, "run ib test:\n");
978	r = amdgpu_ib_ring_tests(adev);
979	if (r)
980		seq_printf(m, "ib ring tests failed (%d).\n", r);
981	else
982		seq_printf(m, "ib ring tests passed.\n");
983
984	/* go on the scheduler */
985	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
986		struct amdgpu_ring *ring = adev->rings[i];
987
988		if (!ring || !ring->sched.thread)
989			continue;
990		kthread_unpark(ring->sched.thread);
991	}
992
993	mutex_unlock(&adev->lock_reset);
994
995	pm_runtime_mark_last_busy(dev->dev);
996	pm_runtime_put_autosuspend(dev->dev);
997
998	return 0;
999}
1000
1001static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data)
1002{
1003	struct drm_info_node *node = (struct drm_info_node *) m->private;
1004	struct drm_device *dev = node->minor->dev;
1005	struct amdgpu_device *adev = dev->dev_private;
1006
1007	seq_write(m, adev->bios, adev->bios_size);
1008	return 0;
1009}
1010
1011static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
1012{
1013	struct drm_info_node *node = (struct drm_info_node *)m->private;
1014	struct drm_device *dev = node->minor->dev;
1015	struct amdgpu_device *adev = dev->dev_private;
1016	int r;
1017
1018	r = pm_runtime_get_sync(dev->dev);
1019	if (r < 0)
1020		return r;
1021
1022	seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
1023
1024	pm_runtime_mark_last_busy(dev->dev);
1025	pm_runtime_put_autosuspend(dev->dev);
1026
1027	return 0;
1028}
1029
1030static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
1031{
1032	struct drm_info_node *node = (struct drm_info_node *)m->private;
1033	struct drm_device *dev = node->minor->dev;
1034	struct amdgpu_device *adev = dev->dev_private;
1035	int r;
1036
1037	r = pm_runtime_get_sync(dev->dev);
1038	if (r < 0)
1039		return r;
1040
1041	seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
1042
1043	pm_runtime_mark_last_busy(dev->dev);
1044	pm_runtime_put_autosuspend(dev->dev);
1045
1046	return 0;
1047}
1048
1049static const struct drm_info_list amdgpu_debugfs_list[] = {
1050	{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
1051	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
1052	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},
1053	{"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
1054};
1055
1056static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
1057					  struct dma_fence **fences)
1058{
1059	struct amdgpu_fence_driver *drv = &ring->fence_drv;
1060	uint32_t sync_seq, last_seq;
1061
1062	last_seq = atomic_read(&ring->fence_drv.last_seq);
1063	sync_seq = ring->fence_drv.sync_seq;
1064
1065	last_seq &= drv->num_fences_mask;
1066	sync_seq &= drv->num_fences_mask;
1067
1068	do {
1069		struct dma_fence *fence, **ptr;
1070
1071		++last_seq;
1072		last_seq &= drv->num_fences_mask;
1073		ptr = &drv->fences[last_seq];
1074
1075		fence = rcu_dereference_protected(*ptr, 1);
1076		RCU_INIT_POINTER(*ptr, NULL);
1077
1078		if (!fence)
1079			continue;
1080
1081		fences[last_seq] = fence;
1082
1083	} while (last_seq != sync_seq);
1084}
1085
1086static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
1087					    int length)
1088{
1089	int i;
1090	struct dma_fence *fence;
1091
1092	for (i = 0; i < length; i++) {
1093		fence = fences[i];
1094		if (!fence)
1095			continue;
1096		dma_fence_signal(fence);
1097		dma_fence_put(fence);
1098	}
1099}
1100
1101static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
1102{
1103	struct drm_sched_job *s_job;
1104	struct dma_fence *fence;
1105
1106	spin_lock(&sched->job_list_lock);
1107	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
1108		fence = sched->ops->run_job(s_job);
1109		dma_fence_put(fence);
1110	}
1111	spin_unlock(&sched->job_list_lock);
1112}
1113
1114static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
1115{
1116	struct amdgpu_job *job;
1117	struct drm_sched_job *s_job;
1118	uint32_t preempt_seq;
1119	struct dma_fence *fence, **ptr;
1120	struct amdgpu_fence_driver *drv = &ring->fence_drv;
1121	struct drm_gpu_scheduler *sched = &ring->sched;
1122
1123	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
1124		return;
1125
1126	preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
1127	if (preempt_seq <= atomic_read(&drv->last_seq))
1128		return;
1129
1130	preempt_seq &= drv->num_fences_mask;
1131	ptr = &drv->fences[preempt_seq];
1132	fence = rcu_dereference_protected(*ptr, 1);
1133
1134	spin_lock(&sched->job_list_lock);
1135	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
1136		job = to_amdgpu_job(s_job);
1137		if (job->fence == fence)
1138			/* mark the job as preempted */
1139			job->preemption_status |= AMDGPU_IB_PREEMPTED;
1140	}
1141	spin_unlock(&sched->job_list_lock);
1142}
1143
1144static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
1145{
1146	int r, resched, length;
1147	struct amdgpu_ring *ring;
1148	struct dma_fence **fences = NULL;
1149	struct amdgpu_device *adev = (struct amdgpu_device *)data;
1150
1151	if (val >= AMDGPU_MAX_RINGS)
1152		return -EINVAL;
1153
1154	ring = adev->rings[val];
1155
1156	if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
1157		return -EINVAL;
1158
1159	/* the last preemption failed */
1160	if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr))
1161		return -EBUSY;
1162
1163	length = ring->fence_drv.num_fences_mask + 1;
1164	fences = kcalloc(length, sizeof(void *), GFP_KERNEL);
1165	if (!fences)
1166		return -ENOMEM;
1167
1168	/* Avoid accidently unparking the sched thread during GPU reset */
1169	mutex_lock(&adev->lock_reset);
1170
1171	/* stop the scheduler */
1172	kthread_park(ring->sched.thread);
1173
1174	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
1175
1176	/* preempt the IB */
1177	r = amdgpu_ring_preempt_ib(ring);
1178	if (r) {
1179		DRM_WARN("failed to preempt ring %d\n", ring->idx);
1180		goto failure;
1181	}
1182
1183	amdgpu_fence_process(ring);
1184
1185	if (atomic_read(&ring->fence_drv.last_seq) !=
1186	    ring->fence_drv.sync_seq) {
1187		DRM_INFO("ring %d was preempted\n", ring->idx);
1188
1189		amdgpu_ib_preempt_mark_partial_job(ring);
1190
1191		/* swap out the old fences */
1192		amdgpu_ib_preempt_fences_swap(ring, fences);
1193
1194		amdgpu_fence_driver_force_completion(ring);
1195
1196		/* resubmit unfinished jobs */
1197		amdgpu_ib_preempt_job_recovery(&ring->sched);
1198
1199		/* wait for jobs finished */
1200		amdgpu_fence_wait_empty(ring);
1201
1202		/* signal the old fences */
1203		amdgpu_ib_preempt_signal_fences(fences, length);
1204	}
1205
1206failure:
1207	/* restart the scheduler */
1208	kthread_unpark(ring->sched.thread);
1209
1210	mutex_unlock(&adev->lock_reset);
1211
1212	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
1213
1214	kfree(fences);
1215
1216	return 0;
1217}
1218
1219DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,
1220			amdgpu_debugfs_ib_preempt, "%llu\n");
1221
1222int amdgpu_debugfs_init(struct amdgpu_device *adev)
1223{
1224	adev->debugfs_preempt =
1225		debugfs_create_file("amdgpu_preempt_ib", 0600,
1226				    adev->ddev->primary->debugfs_root, adev,
1227				    &fops_ib_preempt);
1228	if (!(adev->debugfs_preempt)) {
1229		DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
1230		return -EIO;
1231	}
1232
1233	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
1234					ARRAY_SIZE(amdgpu_debugfs_list));
1235}
1236
1237void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev)
1238{
1239	debugfs_remove(adev->debugfs_preempt);
1240}
1241
1242#else
1243int amdgpu_debugfs_init(struct amdgpu_device *adev)
1244{
1245	return 0;
1246}
1247void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { }
1248int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
1249{
1250	return 0;
1251}
1252void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { }
1253#endif
1254