1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 *          Alex Deucher
26 *          Jerome Glisse
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <dev/drm2/drmP.h>
33#include <dev/drm2/radeon/radeon_drm.h>
34#include "radeon_reg.h"
35#include "radeon.h"
36#include "radeon_asic.h"
37#include "r100d.h"
38#include "rs100d.h"
39#include "rv200d.h"
40#include "rv250d.h"
41#include "atom.h"
42
43#include "r100_reg_safe.h"
44#include "rn50_reg_safe.h"
45
46/* Firmware Names */
47#define FIRMWARE_R100		"radeonkmsfw_R100_cp"
48#define FIRMWARE_R200		"radeonkmsfw_R200_cp"
49#define FIRMWARE_R300		"radeonkmsfw_R300_cp"
50#define FIRMWARE_R420		"radeonkmsfw_R420_cp"
51#define FIRMWARE_RS690		"radeonkmsfw_RS690_cp"
52#define FIRMWARE_RS600		"radeonkmsfw_RS600_cp"
53#define FIRMWARE_R520		"radeonkmsfw_R520_cp"
54
55#ifdef __linux__
56MODULE_FIRMWARE(FIRMWARE_R100);
57MODULE_FIRMWARE(FIRMWARE_R200);
58MODULE_FIRMWARE(FIRMWARE_R300);
59MODULE_FIRMWARE(FIRMWARE_R420);
60MODULE_FIRMWARE(FIRMWARE_RS690);
61MODULE_FIRMWARE(FIRMWARE_RS600);
62MODULE_FIRMWARE(FIRMWARE_R520);
63#endif
64
65#include "r100_track.h"
66
67/* This files gather functions specifics to:
68 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
69 * and others in some cases.
70 */
71
72static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
73{
74	if (crtc == 0) {
75		if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
76			return true;
77		else
78			return false;
79	} else {
80		if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
81			return true;
82		else
83			return false;
84	}
85}
86
87static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
88{
89	u32 vline1, vline2;
90
91	if (crtc == 0) {
92		vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
93		vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
94	} else {
95		vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
96		vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
97	}
98	if (vline1 != vline2)
99		return true;
100	else
101		return false;
102}
103
104/**
105 * r100_wait_for_vblank - vblank wait asic callback.
106 *
107 * @rdev: radeon_device pointer
108 * @crtc: crtc to wait for vblank on
109 *
110 * Wait for vblank on the requested crtc (r1xx-r4xx).
111 */
112void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
113{
114	unsigned i = 0;
115
116	if (crtc >= rdev->num_crtc)
117		return;
118
119	if (crtc == 0) {
120		if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
121			return;
122	} else {
123		if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
124			return;
125	}
126
127	/* depending on when we hit vblank, we may be close to active; if so,
128	 * wait for another frame.
129	 */
130	while (r100_is_in_vblank(rdev, crtc)) {
131		if (i++ % 100 == 0) {
132			if (!r100_is_counter_moving(rdev, crtc))
133				break;
134		}
135	}
136
137	while (!r100_is_in_vblank(rdev, crtc)) {
138		if (i++ % 100 == 0) {
139			if (!r100_is_counter_moving(rdev, crtc))
140				break;
141		}
142	}
143}
144
145/**
146 * r100_pre_page_flip - pre-pageflip callback.
147 *
148 * @rdev: radeon_device pointer
149 * @crtc: crtc to prepare for pageflip on
150 *
151 * Pre-pageflip callback (r1xx-r4xx).
152 * Enables the pageflip irq (vblank irq).
153 */
154void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
155{
156	/* enable the pflip int */
157	radeon_irq_kms_pflip_irq_get(rdev, crtc);
158}
159
160/**
161 * r100_post_page_flip - pos-pageflip callback.
162 *
163 * @rdev: radeon_device pointer
164 * @crtc: crtc to cleanup pageflip on
165 *
166 * Post-pageflip callback (r1xx-r4xx).
167 * Disables the pageflip irq (vblank irq).
168 */
169void r100_post_page_flip(struct radeon_device *rdev, int crtc)
170{
171	/* disable the pflip int */
172	radeon_irq_kms_pflip_irq_put(rdev, crtc);
173}
174
175/**
176 * r100_page_flip - pageflip callback.
177 *
178 * @rdev: radeon_device pointer
179 * @crtc_id: crtc to cleanup pageflip on
180 * @crtc_base: new address of the crtc (GPU MC address)
181 *
182 * Does the actual pageflip (r1xx-r4xx).
183 * During vblank we take the crtc lock and wait for the update_pending
184 * bit to go high, when it does, we release the lock, and allow the
185 * double buffered update to take place.
186 * Returns the current update pending status.
187 */
188u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
189{
190	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
191	u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
192	int i;
193
194	/* Lock the graphics update lock */
195	/* update the scanout addresses */
196	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
197
198	/* Wait for update_pending to go high. */
199	for (i = 0; i < rdev->usec_timeout; i++) {
200		if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
201			break;
202		udelay(1);
203	}
204	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
205
206	/* Unlock the lock, so double-buffering can take place inside vblank */
207	tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
208	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
209
210	/* Return current update_pending status: */
211	return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET;
212}
213
214/**
215 * r100_pm_get_dynpm_state - look up dynpm power state callback.
216 *
217 * @rdev: radeon_device pointer
218 *
219 * Look up the optimal power state based on the
220 * current state of the GPU (r1xx-r5xx).
221 * Used for dynpm only.
222 */
223void r100_pm_get_dynpm_state(struct radeon_device *rdev)
224{
225	int i;
226	rdev->pm.dynpm_can_upclock = true;
227	rdev->pm.dynpm_can_downclock = true;
228
229	switch (rdev->pm.dynpm_planned_action) {
230	case DYNPM_ACTION_MINIMUM:
231		rdev->pm.requested_power_state_index = 0;
232		rdev->pm.dynpm_can_downclock = false;
233		break;
234	case DYNPM_ACTION_DOWNCLOCK:
235		if (rdev->pm.current_power_state_index == 0) {
236			rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
237			rdev->pm.dynpm_can_downclock = false;
238		} else {
239			if (rdev->pm.active_crtc_count > 1) {
240				for (i = 0; i < rdev->pm.num_power_states; i++) {
241					if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
242						continue;
243					else if (i >= rdev->pm.current_power_state_index) {
244						rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
245						break;
246					} else {
247						rdev->pm.requested_power_state_index = i;
248						break;
249					}
250				}
251			} else
252				rdev->pm.requested_power_state_index =
253					rdev->pm.current_power_state_index - 1;
254		}
255		/* don't use the power state if crtcs are active and no display flag is set */
256		if ((rdev->pm.active_crtc_count > 0) &&
257		    (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
258		     RADEON_PM_MODE_NO_DISPLAY)) {
259			rdev->pm.requested_power_state_index++;
260		}
261		break;
262	case DYNPM_ACTION_UPCLOCK:
263		if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
264			rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
265			rdev->pm.dynpm_can_upclock = false;
266		} else {
267			if (rdev->pm.active_crtc_count > 1) {
268				for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
269					if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
270						continue;
271					else if (i <= rdev->pm.current_power_state_index) {
272						rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
273						break;
274					} else {
275						rdev->pm.requested_power_state_index = i;
276						break;
277					}
278				}
279			} else
280				rdev->pm.requested_power_state_index =
281					rdev->pm.current_power_state_index + 1;
282		}
283		break;
284	case DYNPM_ACTION_DEFAULT:
285		rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
286		rdev->pm.dynpm_can_upclock = false;
287		break;
288	case DYNPM_ACTION_NONE:
289	default:
290		DRM_ERROR("Requested mode for not defined action\n");
291		return;
292	}
293	/* only one clock mode per power state */
294	rdev->pm.requested_clock_mode_index = 0;
295
296	DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
297		  rdev->pm.power_state[rdev->pm.requested_power_state_index].
298		  clock_info[rdev->pm.requested_clock_mode_index].sclk,
299		  rdev->pm.power_state[rdev->pm.requested_power_state_index].
300		  clock_info[rdev->pm.requested_clock_mode_index].mclk,
301		  rdev->pm.power_state[rdev->pm.requested_power_state_index].
302		  pcie_lanes);
303}
304
305/**
306 * r100_pm_init_profile - Initialize power profiles callback.
307 *
308 * @rdev: radeon_device pointer
309 *
310 * Initialize the power states used in profile mode
311 * (r1xx-r3xx).
312 * Used for profile mode only.
313 */
314void r100_pm_init_profile(struct radeon_device *rdev)
315{
316	/* default */
317	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
318	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
319	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
320	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
321	/* low sh */
322	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
323	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
324	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
325	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
326	/* mid sh */
327	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
328	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
329	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
330	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
331	/* high sh */
332	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
333	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
334	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
335	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
336	/* low mh */
337	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
338	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
339	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
340	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
341	/* mid mh */
342	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
343	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
344	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
345	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
346	/* high mh */
347	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
348	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
349	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
350	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
351}
352
353/**
354 * r100_pm_misc - set additional pm hw parameters callback.
355 *
356 * @rdev: radeon_device pointer
357 *
358 * Set non-clock parameters associated with a power state
359 * (voltage, pcie lanes, etc.) (r1xx-r4xx).
360 */
361void r100_pm_misc(struct radeon_device *rdev)
362{
363	int requested_index = rdev->pm.requested_power_state_index;
364	struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
365	struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
366	u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
367
368	if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
369		if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
370			tmp = RREG32(voltage->gpio.reg);
371			if (voltage->active_high)
372				tmp |= voltage->gpio.mask;
373			else
374				tmp &= ~(voltage->gpio.mask);
375			WREG32(voltage->gpio.reg, tmp);
376			if (voltage->delay)
377				udelay(voltage->delay);
378		} else {
379			tmp = RREG32(voltage->gpio.reg);
380			if (voltage->active_high)
381				tmp &= ~voltage->gpio.mask;
382			else
383				tmp |= voltage->gpio.mask;
384			WREG32(voltage->gpio.reg, tmp);
385			if (voltage->delay)
386				udelay(voltage->delay);
387		}
388	}
389
390	sclk_cntl = RREG32_PLL(SCLK_CNTL);
391	sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
392	sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
393	sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
394	sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
395	if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
396		sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
397		if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
398			sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
399		else
400			sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
401		if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
402			sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
403		else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
404			sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
405	} else
406		sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
407
408	if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
409		sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
410		if (voltage->delay) {
411			sclk_more_cntl |= VOLTAGE_DROP_SYNC;
412			switch (voltage->delay) {
413			case 33:
414				sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
415				break;
416			case 66:
417				sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
418				break;
419			case 99:
420				sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
421				break;
422			case 132:
423				sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
424				break;
425			}
426		} else
427			sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
428	} else
429		sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
430
431	if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
432		sclk_cntl &= ~FORCE_HDP;
433	else
434		sclk_cntl |= FORCE_HDP;
435
436	WREG32_PLL(SCLK_CNTL, sclk_cntl);
437	WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
438	WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
439
440	/* set pcie lanes */
441	if ((rdev->flags & RADEON_IS_PCIE) &&
442	    !(rdev->flags & RADEON_IS_IGP) &&
443	    rdev->asic->pm.set_pcie_lanes &&
444	    (ps->pcie_lanes !=
445	     rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
446		radeon_set_pcie_lanes(rdev,
447				      ps->pcie_lanes);
448		DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
449	}
450}
451
452/**
453 * r100_pm_prepare - pre-power state change callback.
454 *
455 * @rdev: radeon_device pointer
456 *
457 * Prepare for a power state change (r1xx-r4xx).
458 */
459void r100_pm_prepare(struct radeon_device *rdev)
460{
461	struct drm_device *ddev = rdev->ddev;
462	struct drm_crtc *crtc;
463	struct radeon_crtc *radeon_crtc;
464	u32 tmp;
465
466	/* disable any active CRTCs */
467	list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
468		radeon_crtc = to_radeon_crtc(crtc);
469		if (radeon_crtc->enabled) {
470			if (radeon_crtc->crtc_id) {
471				tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
472				tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
473				WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
474			} else {
475				tmp = RREG32(RADEON_CRTC_GEN_CNTL);
476				tmp |= RADEON_CRTC_DISP_REQ_EN_B;
477				WREG32(RADEON_CRTC_GEN_CNTL, tmp);
478			}
479		}
480	}
481}
482
483/**
484 * r100_pm_finish - post-power state change callback.
485 *
486 * @rdev: radeon_device pointer
487 *
488 * Clean up after a power state change (r1xx-r4xx).
489 */
490void r100_pm_finish(struct radeon_device *rdev)
491{
492	struct drm_device *ddev = rdev->ddev;
493	struct drm_crtc *crtc;
494	struct radeon_crtc *radeon_crtc;
495	u32 tmp;
496
497	/* enable any active CRTCs */
498	list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
499		radeon_crtc = to_radeon_crtc(crtc);
500		if (radeon_crtc->enabled) {
501			if (radeon_crtc->crtc_id) {
502				tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
503				tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
504				WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
505			} else {
506				tmp = RREG32(RADEON_CRTC_GEN_CNTL);
507				tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
508				WREG32(RADEON_CRTC_GEN_CNTL, tmp);
509			}
510		}
511	}
512}
513
514/**
515 * r100_gui_idle - gui idle callback.
516 *
517 * @rdev: radeon_device pointer
518 *
519 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
520 * Returns true if idle, false if not.
521 */
522bool r100_gui_idle(struct radeon_device *rdev)
523{
524	if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
525		return false;
526	else
527		return true;
528}
529
530/* hpd for digital panel detect/disconnect */
531/**
532 * r100_hpd_sense - hpd sense callback.
533 *
534 * @rdev: radeon_device pointer
535 * @hpd: hpd (hotplug detect) pin
536 *
537 * Checks if a digital monitor is connected (r1xx-r4xx).
538 * Returns true if connected, false if not connected.
539 */
540bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
541{
542	bool connected = false;
543
544	switch (hpd) {
545	case RADEON_HPD_1:
546		if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
547			connected = true;
548		break;
549	case RADEON_HPD_2:
550		if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
551			connected = true;
552		break;
553	default:
554		break;
555	}
556	return connected;
557}
558
559/**
560 * r100_hpd_set_polarity - hpd set polarity callback.
561 *
562 * @rdev: radeon_device pointer
563 * @hpd: hpd (hotplug detect) pin
564 *
565 * Set the polarity of the hpd pin (r1xx-r4xx).
566 */
567void r100_hpd_set_polarity(struct radeon_device *rdev,
568			   enum radeon_hpd_id hpd)
569{
570	u32 tmp;
571	bool connected = r100_hpd_sense(rdev, hpd);
572
573	switch (hpd) {
574	case RADEON_HPD_1:
575		tmp = RREG32(RADEON_FP_GEN_CNTL);
576		if (connected)
577			tmp &= ~RADEON_FP_DETECT_INT_POL;
578		else
579			tmp |= RADEON_FP_DETECT_INT_POL;
580		WREG32(RADEON_FP_GEN_CNTL, tmp);
581		break;
582	case RADEON_HPD_2:
583		tmp = RREG32(RADEON_FP2_GEN_CNTL);
584		if (connected)
585			tmp &= ~RADEON_FP2_DETECT_INT_POL;
586		else
587			tmp |= RADEON_FP2_DETECT_INT_POL;
588		WREG32(RADEON_FP2_GEN_CNTL, tmp);
589		break;
590	default:
591		break;
592	}
593}
594
595/**
596 * r100_hpd_init - hpd setup callback.
597 *
598 * @rdev: radeon_device pointer
599 *
600 * Setup the hpd pins used by the card (r1xx-r4xx).
601 * Set the polarity, and enable the hpd interrupts.
602 */
603void r100_hpd_init(struct radeon_device *rdev)
604{
605	struct drm_device *dev = rdev->ddev;
606	struct drm_connector *connector;
607	unsigned enable = 0;
608
609	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
610		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
611		enable |= 1 << radeon_connector->hpd.hpd;
612		radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
613	}
614	radeon_irq_kms_enable_hpd(rdev, enable);
615}
616
617/**
618 * r100_hpd_fini - hpd tear down callback.
619 *
620 * @rdev: radeon_device pointer
621 *
622 * Tear down the hpd pins used by the card (r1xx-r4xx).
623 * Disable the hpd interrupts.
624 */
625void r100_hpd_fini(struct radeon_device *rdev)
626{
627	struct drm_device *dev = rdev->ddev;
628	struct drm_connector *connector;
629	unsigned disable = 0;
630
631	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
632		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
633		disable |= 1 << radeon_connector->hpd.hpd;
634	}
635	radeon_irq_kms_disable_hpd(rdev, disable);
636}
637
638/*
639 * PCI GART
640 */
641void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
642{
643	/* TODO: can we do somethings here ? */
644	/* It seems hw only cache one entry so we should discard this
645	 * entry otherwise if first GPU GART read hit this entry it
646	 * could end up in wrong address. */
647}
648
649int r100_pci_gart_init(struct radeon_device *rdev)
650{
651	int r;
652
653	if (rdev->gart.ptr) {
654		DRM_ERROR("R100 PCI GART already initialized\n");
655		return 0;
656	}
657	/* Initialize common gart structure */
658	r = radeon_gart_init(rdev);
659	if (r)
660		return r;
661	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
662	rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
663	rdev->asic->gart.set_page = &r100_pci_gart_set_page;
664	return radeon_gart_table_ram_alloc(rdev);
665}
666
667int r100_pci_gart_enable(struct radeon_device *rdev)
668{
669	uint32_t tmp;
670
671	radeon_gart_restore(rdev);
672	/* discard memory request outside of configured range */
673	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
674	WREG32(RADEON_AIC_CNTL, tmp);
675	/* set address range for PCI address translate */
676	WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
677	WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
678	/* set PCI GART page-table base address */
679	WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
680	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
681	WREG32(RADEON_AIC_CNTL, tmp);
682	r100_pci_gart_tlb_flush(rdev);
683	DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
684		 (unsigned)(rdev->mc.gtt_size >> 20),
685		 (unsigned long long)rdev->gart.table_addr);
686	rdev->gart.ready = true;
687	return 0;
688}
689
690void r100_pci_gart_disable(struct radeon_device *rdev)
691{
692	uint32_t tmp;
693
694	/* discard memory request outside of configured range */
695	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
696	WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
697	WREG32(RADEON_AIC_LO_ADDR, 0);
698	WREG32(RADEON_AIC_HI_ADDR, 0);
699}
700
701int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
702{
703	u32 *gtt = rdev->gart.ptr;
704
705	if (i < 0 || i > rdev->gart.num_gpu_pages) {
706		return -EINVAL;
707	}
708	gtt[i] = cpu_to_le32(lower_32_bits(addr));
709	return 0;
710}
711
712void r100_pci_gart_fini(struct radeon_device *rdev)
713{
714	radeon_gart_fini(rdev);
715	r100_pci_gart_disable(rdev);
716	radeon_gart_table_ram_free(rdev);
717}
718
719int r100_irq_set(struct radeon_device *rdev)
720{
721	uint32_t tmp = 0;
722
723	if (!rdev->irq.installed) {
724		DRM_ERROR("Can't enable IRQ/MSI because no handler is installed\n");
725		WREG32(R_000040_GEN_INT_CNTL, 0);
726		return -EINVAL;
727	}
728	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
729		tmp |= RADEON_SW_INT_ENABLE;
730	}
731	if (rdev->irq.crtc_vblank_int[0] ||
732	    atomic_read(&rdev->irq.pflip[0])) {
733		tmp |= RADEON_CRTC_VBLANK_MASK;
734	}
735	if (rdev->irq.crtc_vblank_int[1] ||
736	    atomic_read(&rdev->irq.pflip[1])) {
737		tmp |= RADEON_CRTC2_VBLANK_MASK;
738	}
739	if (rdev->irq.hpd[0]) {
740		tmp |= RADEON_FP_DETECT_MASK;
741	}
742	if (rdev->irq.hpd[1]) {
743		tmp |= RADEON_FP2_DETECT_MASK;
744	}
745	WREG32(RADEON_GEN_INT_CNTL, tmp);
746	return 0;
747}
748
749void r100_irq_disable(struct radeon_device *rdev)
750{
751	u32 tmp;
752
753	WREG32(R_000040_GEN_INT_CNTL, 0);
754	/* Wait and acknowledge irq */
755	mdelay(1);
756	tmp = RREG32(R_000044_GEN_INT_STATUS);
757	WREG32(R_000044_GEN_INT_STATUS, tmp);
758}
759
760static uint32_t r100_irq_ack(struct radeon_device *rdev)
761{
762	uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
763	uint32_t irq_mask = RADEON_SW_INT_TEST |
764		RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
765		RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
766
767	if (irqs) {
768		WREG32(RADEON_GEN_INT_STATUS, irqs);
769	}
770	return irqs & irq_mask;
771}
772
773irqreturn_t r100_irq_process(struct radeon_device *rdev)
774{
775	uint32_t status, msi_rearm;
776	bool queue_hotplug = false;
777
778	status = r100_irq_ack(rdev);
779	if (!status) {
780		return IRQ_NONE;
781	}
782	if (rdev->shutdown) {
783		return IRQ_NONE;
784	}
785	while (status) {
786		/* SW interrupt */
787		if (status & RADEON_SW_INT_TEST) {
788			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
789		}
790		/* Vertical blank interrupts */
791		if (status & RADEON_CRTC_VBLANK_STAT) {
792			if (rdev->irq.crtc_vblank_int[0]) {
793				drm_handle_vblank(rdev->ddev, 0);
794				rdev->pm.vblank_sync = true;
795				DRM_WAKEUP(&rdev->irq.vblank_queue);
796			}
797			if (atomic_read(&rdev->irq.pflip[0]))
798				radeon_crtc_handle_flip(rdev, 0);
799		}
800		if (status & RADEON_CRTC2_VBLANK_STAT) {
801			if (rdev->irq.crtc_vblank_int[1]) {
802				drm_handle_vblank(rdev->ddev, 1);
803				rdev->pm.vblank_sync = true;
804				DRM_WAKEUP(&rdev->irq.vblank_queue);
805			}
806			if (atomic_read(&rdev->irq.pflip[1]))
807				radeon_crtc_handle_flip(rdev, 1);
808		}
809		if (status & RADEON_FP_DETECT_STAT) {
810			queue_hotplug = true;
811			DRM_DEBUG("HPD1\n");
812		}
813		if (status & RADEON_FP2_DETECT_STAT) {
814			queue_hotplug = true;
815			DRM_DEBUG("HPD2\n");
816		}
817		status = r100_irq_ack(rdev);
818	}
819	if (queue_hotplug)
820		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
821	if (rdev->msi_enabled) {
822		switch (rdev->family) {
823		case CHIP_RS400:
824		case CHIP_RS480:
825			msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
826			WREG32(RADEON_AIC_CNTL, msi_rearm);
827			WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
828			break;
829		default:
830			WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
831			break;
832		}
833	}
834	return IRQ_HANDLED;
835}
836
837u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
838{
839	if (crtc == 0)
840		return RREG32(RADEON_CRTC_CRNT_FRAME);
841	else
842		return RREG32(RADEON_CRTC2_CRNT_FRAME);
843}
844
845/* Who ever call radeon_fence_emit should call ring_lock and ask
846 * for enough space (today caller are ib schedule and buffer move) */
847void r100_fence_ring_emit(struct radeon_device *rdev,
848			  struct radeon_fence *fence)
849{
850	struct radeon_ring *ring = &rdev->ring[fence->ring];
851
852	/* We have to make sure that caches are flushed before
853	 * CPU might read something from VRAM. */
854	radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
855	radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
856	radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
857	radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
858	/* Wait until IDLE & CLEAN */
859	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
860	radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
861	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
862	radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
863				RADEON_HDP_READ_BUFFER_INVALIDATE);
864	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
865	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
866	/* Emit fence sequence & fire IRQ */
867	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
868	radeon_ring_write(ring, fence->seq);
869	radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
870	radeon_ring_write(ring, RADEON_SW_INT_FIRE);
871}
872
873void r100_semaphore_ring_emit(struct radeon_device *rdev,
874			      struct radeon_ring *ring,
875			      struct radeon_semaphore *semaphore,
876			      bool emit_wait)
877{
878	/* Unused on older asics, since we don't have semaphores or multiple rings */
879	panic("%s: Unused on older asics", __func__);
880}
881
882int r100_copy_blit(struct radeon_device *rdev,
883		   uint64_t src_offset,
884		   uint64_t dst_offset,
885		   unsigned num_gpu_pages,
886		   struct radeon_fence **fence)
887{
888	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
889	uint32_t cur_pages;
890	uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
891	uint32_t pitch;
892	uint32_t stride_pixels;
893	unsigned ndw;
894	int num_loops;
895	int r = 0;
896
897	/* radeon limited to 16k stride */
898	stride_bytes &= 0x3fff;
899	/* radeon pitch is /64 */
900	pitch = stride_bytes / 64;
901	stride_pixels = stride_bytes / 4;
902	num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
903
904	/* Ask for enough room for blit + flush + fence */
905	ndw = 64 + (10 * num_loops);
906	r = radeon_ring_lock(rdev, ring, ndw);
907	if (r) {
908		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
909		return -EINVAL;
910	}
911	while (num_gpu_pages > 0) {
912		cur_pages = num_gpu_pages;
913		if (cur_pages > 8191) {
914			cur_pages = 8191;
915		}
916		num_gpu_pages -= cur_pages;
917
918		/* pages are in Y direction - height
919		   page width in X direction - width */
920		radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
921		radeon_ring_write(ring,
922				  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
923				  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
924				  RADEON_GMC_SRC_CLIPPING |
925				  RADEON_GMC_DST_CLIPPING |
926				  RADEON_GMC_BRUSH_NONE |
927				  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
928				  RADEON_GMC_SRC_DATATYPE_COLOR |
929				  RADEON_ROP3_S |
930				  RADEON_DP_SRC_SOURCE_MEMORY |
931				  RADEON_GMC_CLR_CMP_CNTL_DIS |
932				  RADEON_GMC_WR_MSK_DIS);
933		radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
934		radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
935		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
936		radeon_ring_write(ring, 0);
937		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
938		radeon_ring_write(ring, num_gpu_pages);
939		radeon_ring_write(ring, num_gpu_pages);
940		radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
941	}
942	radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
943	radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
944	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
945	radeon_ring_write(ring,
946			  RADEON_WAIT_2D_IDLECLEAN |
947			  RADEON_WAIT_HOST_IDLECLEAN |
948			  RADEON_WAIT_DMA_GUI_IDLE);
949	if (fence) {
950		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
951	}
952	radeon_ring_unlock_commit(rdev, ring);
953	return r;
954}
955
956static int r100_cp_wait_for_idle(struct radeon_device *rdev)
957{
958	unsigned i;
959	u32 tmp;
960
961	for (i = 0; i < rdev->usec_timeout; i++) {
962		tmp = RREG32(R_000E40_RBBM_STATUS);
963		if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
964			return 0;
965		}
966		udelay(1);
967	}
968	return -1;
969}
970
971void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
972{
973	int r;
974
975	r = radeon_ring_lock(rdev, ring, 2);
976	if (r) {
977		return;
978	}
979	radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
980	radeon_ring_write(ring,
981			  RADEON_ISYNC_ANY2D_IDLE3D |
982			  RADEON_ISYNC_ANY3D_IDLE2D |
983			  RADEON_ISYNC_WAIT_IDLEGUI |
984			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
985	radeon_ring_unlock_commit(rdev, ring);
986}
987
988
989/* Load the microcode for the CP */
990static int r100_cp_init_microcode(struct radeon_device *rdev)
991{
992	const char *fw_name = NULL;
993	int err;
994
995	DRM_DEBUG_KMS("\n");
996
997	if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
998	    (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
999	    (rdev->family == CHIP_RS200)) {
1000		DRM_INFO("Loading R100 Microcode\n");
1001		fw_name = FIRMWARE_R100;
1002	} else if ((rdev->family == CHIP_R200) ||
1003		   (rdev->family == CHIP_RV250) ||
1004		   (rdev->family == CHIP_RV280) ||
1005		   (rdev->family == CHIP_RS300)) {
1006		DRM_INFO("Loading R200 Microcode\n");
1007		fw_name = FIRMWARE_R200;
1008	} else if ((rdev->family == CHIP_R300) ||
1009		   (rdev->family == CHIP_R350) ||
1010		   (rdev->family == CHIP_RV350) ||
1011		   (rdev->family == CHIP_RV380) ||
1012		   (rdev->family == CHIP_RS400) ||
1013		   (rdev->family == CHIP_RS480)) {
1014		DRM_INFO("Loading R300 Microcode\n");
1015		fw_name = FIRMWARE_R300;
1016	} else if ((rdev->family == CHIP_R420) ||
1017		   (rdev->family == CHIP_R423) ||
1018		   (rdev->family == CHIP_RV410)) {
1019		DRM_INFO("Loading R400 Microcode\n");
1020		fw_name = FIRMWARE_R420;
1021	} else if ((rdev->family == CHIP_RS690) ||
1022		   (rdev->family == CHIP_RS740)) {
1023		DRM_INFO("Loading RS690/RS740 Microcode\n");
1024		fw_name = FIRMWARE_RS690;
1025	} else if (rdev->family == CHIP_RS600) {
1026		DRM_INFO("Loading RS600 Microcode\n");
1027		fw_name = FIRMWARE_RS600;
1028	} else if ((rdev->family == CHIP_RV515) ||
1029		   (rdev->family == CHIP_R520) ||
1030		   (rdev->family == CHIP_RV530) ||
1031		   (rdev->family == CHIP_R580) ||
1032		   (rdev->family == CHIP_RV560) ||
1033		   (rdev->family == CHIP_RV570)) {
1034		DRM_INFO("Loading R500 Microcode\n");
1035		fw_name = FIRMWARE_R520;
1036	}
1037
1038	err = 0;
1039	rdev->me_fw = firmware_get(fw_name);
1040	if (rdev->me_fw == NULL) {
1041		DRM_ERROR("radeon_cp: Failed to load firmware \"%s\"\n",
1042		       fw_name);
1043		err = -ENOENT;
1044	} else if (rdev->me_fw->datasize % 8) {
1045		DRM_ERROR(
1046		       "radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1047		       rdev->me_fw->datasize, fw_name);
1048		err = -EINVAL;
1049		firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
1050		rdev->me_fw = NULL;
1051	}
1052	return err;
1053}
1054
1055/**
1056 * r100_cp_fini_microcode - drop the firmware image reference
1057 *
1058 * @rdev: radeon_device pointer
1059 *
1060 * Drop the me firmware image reference.
1061 * Called at driver shutdown.
1062 */
1063static void r100_cp_fini_microcode (struct radeon_device *rdev)
1064{
1065
1066	if (rdev->me_fw != NULL) {
1067		firmware_put(rdev->me_fw, FIRMWARE_UNLOAD);
1068		rdev->me_fw = NULL;
1069	}
1070}
1071
1072static void r100_cp_load_microcode(struct radeon_device *rdev)
1073{
1074	const __be32 *fw_data;
1075	int i, size;
1076
1077	if (r100_gui_wait_for_idle(rdev)) {
1078		DRM_ERROR("Failed to wait GUI idle while "
1079		       "programming pipes. Bad things might happen.\n");
1080	}
1081
1082	if (rdev->me_fw) {
1083		size = rdev->me_fw->datasize / 4;
1084		fw_data = (const __be32 *)rdev->me_fw->data;
1085		WREG32(RADEON_CP_ME_RAM_ADDR, 0);
1086		for (i = 0; i < size; i += 2) {
1087			WREG32(RADEON_CP_ME_RAM_DATAH,
1088			       be32_to_cpup(&fw_data[i]));
1089			WREG32(RADEON_CP_ME_RAM_DATAL,
1090			       be32_to_cpup(&fw_data[i + 1]));
1091		}
1092	}
1093}
1094
1095int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1096{
1097	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1098	unsigned rb_bufsz;
1099	unsigned rb_blksz;
1100	unsigned max_fetch;
1101	unsigned pre_write_timer;
1102	unsigned pre_write_limit;
1103	unsigned indirect2_start;
1104	unsigned indirect1_start;
1105	uint32_t tmp;
1106	int r;
1107
1108	if (r100_debugfs_cp_init(rdev)) {
1109		DRM_ERROR("Failed to register debugfs file for CP !\n");
1110	}
1111	if (!rdev->me_fw) {
1112		r = r100_cp_init_microcode(rdev);
1113		if (r) {
1114			DRM_ERROR("Failed to load firmware!\n");
1115			return r;
1116		}
1117	}
1118
1119	/* Align ring size */
1120	rb_bufsz = drm_order(ring_size / 8);
1121	ring_size = (1 << (rb_bufsz + 1)) * 4;
1122	r100_cp_load_microcode(rdev);
1123	r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
1124			     RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR,
1125			     0, 0x7fffff, RADEON_CP_PACKET2);
1126	if (r) {
1127		return r;
1128	}
1129	/* Each time the cp read 1024 bytes (16 dword/quadword) update
1130	 * the rptr copy in system ram */
1131	rb_blksz = 9;
1132	/* cp will read 128bytes at a time (4 dwords) */
1133	max_fetch = 1;
1134	ring->align_mask = 16 - 1;
1135	/* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1136	pre_write_timer = 64;
1137	/* Force CP_RB_WPTR write if written more than one time before the
1138	 * delay expire
1139	 */
1140	pre_write_limit = 0;
1141	/* Setup the cp cache like this (cache size is 96 dwords) :
1142	 *	RING		0  to 15
1143	 *	INDIRECT1	16 to 79
1144	 *	INDIRECT2	80 to 95
1145	 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1146	 *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1147	 *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1148	 * Idea being that most of the gpu cmd will be through indirect1 buffer
1149	 * so it gets the bigger cache.
1150	 */
1151	indirect2_start = 80;
1152	indirect1_start = 16;
1153	/* cp setup */
1154	WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1155	tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1156	       REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1157	       REG_SET(RADEON_MAX_FETCH, max_fetch));
1158#ifdef __BIG_ENDIAN
1159	tmp |= RADEON_BUF_SWAP_32BIT;
1160#endif
1161	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1162
1163	/* Set ring address */
1164	DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
1165	WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
1166	/* Force read & write ptr to 0 */
1167	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1168	WREG32(RADEON_CP_RB_RPTR_WR, 0);
1169	ring->wptr = 0;
1170	WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1171
1172	/* set the wb address whether it's enabled or not */
1173	WREG32(R_00070C_CP_RB_RPTR_ADDR,
1174		S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1175	WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
1176
1177	if (rdev->wb.enabled)
1178		WREG32(R_000770_SCRATCH_UMSK, 0xff);
1179	else {
1180		tmp |= RADEON_RB_NO_UPDATE;
1181		WREG32(R_000770_SCRATCH_UMSK, 0);
1182	}
1183
1184	WREG32(RADEON_CP_RB_CNTL, tmp);
1185	udelay(10);
1186	ring->rptr = RREG32(RADEON_CP_RB_RPTR);
1187	/* Set cp mode to bus mastering & enable cp*/
1188	WREG32(RADEON_CP_CSQ_MODE,
1189	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1190	       REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1191	WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
1192	WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1193	WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
1194
1195	/* at this point everything should be setup correctly to enable master */
1196	pci_enable_busmaster(rdev->dev);
1197
1198	radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1199	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
1200	if (r) {
1201		DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1202		return r;
1203	}
1204	ring->ready = true;
1205	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1206
1207	if (!ring->rptr_save_reg /* not resuming from suspend */
1208	    && radeon_ring_supports_scratch_reg(rdev, ring)) {
1209		r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1210		if (r) {
1211			DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1212			ring->rptr_save_reg = 0;
1213		}
1214	}
1215	return 0;
1216}
1217
1218void r100_cp_fini(struct radeon_device *rdev)
1219{
1220	if (r100_cp_wait_for_idle(rdev)) {
1221		DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1222	}
1223	/* Disable ring */
1224	r100_cp_disable(rdev);
1225	radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1226	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1227	DRM_INFO("radeon: cp finalized\n");
1228}
1229
1230void r100_cp_disable(struct radeon_device *rdev)
1231{
1232	/* Disable ring */
1233	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1234	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1235	WREG32(RADEON_CP_CSQ_MODE, 0);
1236	WREG32(RADEON_CP_CSQ_CNTL, 0);
1237	WREG32(R_000770_SCRATCH_UMSK, 0);
1238	if (r100_gui_wait_for_idle(rdev)) {
1239		DRM_ERROR("Failed to wait GUI idle while "
1240		       "programming pipes. Bad things might happen.\n");
1241	}
1242}
1243
1244/*
1245 * CS functions
1246 */
1247int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1248			    struct radeon_cs_packet *pkt,
1249			    unsigned idx,
1250			    unsigned reg)
1251{
1252	int r;
1253	u32 tile_flags = 0;
1254	u32 tmp;
1255	struct radeon_cs_reloc *reloc;
1256	u32 value;
1257
1258	r = r100_cs_packet_next_reloc(p, &reloc);
1259	if (r) {
1260		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1261			  idx, reg);
1262		r100_cs_dump_packet(p, pkt);
1263		return r;
1264	}
1265
1266	value = radeon_get_ib_value(p, idx);
1267	tmp = value & 0x003fffff;
1268	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1269
1270	if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1271		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1272			tile_flags |= RADEON_DST_TILE_MACRO;
1273		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1274			if (reg == RADEON_SRC_PITCH_OFFSET) {
1275				DRM_ERROR("Cannot src blit from microtiled surface\n");
1276				r100_cs_dump_packet(p, pkt);
1277				return -EINVAL;
1278			}
1279			tile_flags |= RADEON_DST_TILE_MICRO;
1280		}
1281
1282		tmp |= tile_flags;
1283		p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1284	} else
1285		p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1286	return 0;
1287}
1288
1289int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1290			     struct radeon_cs_packet *pkt,
1291			     int idx)
1292{
1293	unsigned c, i;
1294	struct radeon_cs_reloc *reloc;
1295	struct r100_cs_track *track;
1296	int r = 0;
1297	volatile uint32_t *ib;
1298	u32 idx_value;
1299
1300	ib = p->ib.ptr;
1301	track = (struct r100_cs_track *)p->track;
1302	c = radeon_get_ib_value(p, idx++) & 0x1F;
1303	if (c > 16) {
1304	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1305		      pkt->opcode);
1306	    r100_cs_dump_packet(p, pkt);
1307	    return -EINVAL;
1308	}
1309	track->num_arrays = c;
1310	for (i = 0; i < (c - 1); i+=2, idx+=3) {
1311		r = r100_cs_packet_next_reloc(p, &reloc);
1312		if (r) {
1313			DRM_ERROR("No reloc for packet3 %d\n",
1314				  pkt->opcode);
1315			r100_cs_dump_packet(p, pkt);
1316			return r;
1317		}
1318		idx_value = radeon_get_ib_value(p, idx);
1319		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
1320
1321		track->arrays[i + 0].esize = idx_value >> 8;
1322		track->arrays[i + 0].robj = reloc->robj;
1323		track->arrays[i + 0].esize &= 0x7F;
1324		r = r100_cs_packet_next_reloc(p, &reloc);
1325		if (r) {
1326			DRM_ERROR("No reloc for packet3 %d\n",
1327				  pkt->opcode);
1328			r100_cs_dump_packet(p, pkt);
1329			return r;
1330		}
1331		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
1332		track->arrays[i + 1].robj = reloc->robj;
1333		track->arrays[i + 1].esize = idx_value >> 24;
1334		track->arrays[i + 1].esize &= 0x7F;
1335	}
1336	if (c & 1) {
1337		r = r100_cs_packet_next_reloc(p, &reloc);
1338		if (r) {
1339			DRM_ERROR("No reloc for packet3 %d\n",
1340					  pkt->opcode);
1341			r100_cs_dump_packet(p, pkt);
1342			return r;
1343		}
1344		idx_value = radeon_get_ib_value(p, idx);
1345		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
1346		track->arrays[i + 0].robj = reloc->robj;
1347		track->arrays[i + 0].esize = idx_value >> 8;
1348		track->arrays[i + 0].esize &= 0x7F;
1349	}
1350	return r;
1351}
1352
1353int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1354			  struct radeon_cs_packet *pkt,
1355			  const unsigned *auth, unsigned n,
1356			  radeon_packet0_check_t check)
1357{
1358	unsigned reg;
1359	unsigned i, j, m;
1360	unsigned idx;
1361	int r;
1362
1363	idx = pkt->idx + 1;
1364	reg = pkt->reg;
1365	/* Check that register fall into register range
1366	 * determined by the number of entry (n) in the
1367	 * safe register bitmap.
1368	 */
1369	if (pkt->one_reg_wr) {
1370		if ((reg >> 7) > n) {
1371			return -EINVAL;
1372		}
1373	} else {
1374		if (((reg + (pkt->count << 2)) >> 7) > n) {
1375			return -EINVAL;
1376		}
1377	}
1378	for (i = 0; i <= pkt->count; i++, idx++) {
1379		j = (reg >> 7);
1380		m = 1 << ((reg >> 2) & 31);
1381		if (auth[j] & m) {
1382			r = check(p, pkt, idx, reg);
1383			if (r) {
1384				return r;
1385			}
1386		}
1387		if (pkt->one_reg_wr) {
1388			if (!(auth[j] & m)) {
1389				break;
1390			}
1391		} else {
1392			reg += 4;
1393		}
1394	}
1395	return 0;
1396}
1397
1398void r100_cs_dump_packet(struct radeon_cs_parser *p,
1399			 struct radeon_cs_packet *pkt)
1400{
1401	volatile uint32_t *ib;
1402	unsigned i;
1403	unsigned idx;
1404
1405	ib = p->ib.ptr;
1406	idx = pkt->idx;
1407	for (i = 0; i <= (pkt->count + 1); i++, idx++) {
1408		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
1409	}
1410}
1411
1412/**
1413 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
1414 * @parser:	parser structure holding parsing context.
1415 * @pkt:	where to store packet informations
1416 *
1417 * Assume that chunk_ib_index is properly set. Will return -EINVAL
1418 * if packet is bigger than remaining ib size. or if packets is unknown.
1419 **/
1420int r100_cs_packet_parse(struct radeon_cs_parser *p,
1421			 struct radeon_cs_packet *pkt,
1422			 unsigned idx)
1423{
1424	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
1425	uint32_t header;
1426
1427	if (idx >= ib_chunk->length_dw) {
1428		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1429			  idx, ib_chunk->length_dw);
1430		return -EINVAL;
1431	}
1432	header = radeon_get_ib_value(p, idx);
1433	pkt->idx = idx;
1434	pkt->type = CP_PACKET_GET_TYPE(header);
1435	pkt->count = CP_PACKET_GET_COUNT(header);
1436	switch (pkt->type) {
1437	case PACKET_TYPE0:
1438		pkt->reg = CP_PACKET0_GET_REG(header);
1439		pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
1440		break;
1441	case PACKET_TYPE3:
1442		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
1443		break;
1444	case PACKET_TYPE2:
1445		pkt->count = -1;
1446		break;
1447	default:
1448		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
1449		return -EINVAL;
1450	}
1451	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
1452		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1453			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
1454		return -EINVAL;
1455	}
1456	return 0;
1457}
1458
1459/**
1460 * r100_cs_packet_next_vline() - parse userspace VLINE packet
1461 * @parser:		parser structure holding parsing context.
1462 *
1463 * Userspace sends a special sequence for VLINE waits.
1464 * PACKET0 - VLINE_START_END + value
1465 * PACKET0 - WAIT_UNTIL +_value
1466 * RELOC (P3) - crtc_id in reloc.
1467 *
1468 * This function parses this and relocates the VLINE START END
1469 * and WAIT UNTIL packets to the correct crtc.
1470 * It also detects a switched off crtc and nulls out the
1471 * wait in that case.
1472 */
1473int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1474{
1475	struct drm_mode_object *obj;
1476	struct drm_crtc *crtc;
1477	struct radeon_crtc *radeon_crtc;
1478	struct radeon_cs_packet p3reloc, waitreloc;
1479	int crtc_id;
1480	int r;
1481	uint32_t header, h_idx, reg;
1482	volatile uint32_t *ib;
1483
1484	ib = p->ib.ptr;
1485
1486	/* parse the wait until */
1487	r = r100_cs_packet_parse(p, &waitreloc, p->idx);
1488	if (r)
1489		return r;
1490
1491	/* check its a wait until and only 1 count */
1492	if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1493	    waitreloc.count != 0) {
1494		DRM_ERROR("vline wait had illegal wait until segment\n");
1495		return -EINVAL;
1496	}
1497
1498	if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1499		DRM_ERROR("vline wait had illegal wait until\n");
1500		return -EINVAL;
1501	}
1502
1503	/* jump over the NOP */
1504	r = r100_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1505	if (r)
1506		return r;
1507
1508	h_idx = p->idx - 2;
1509	p->idx += waitreloc.count + 2;
1510	p->idx += p3reloc.count + 2;
1511
1512	header = radeon_get_ib_value(p, h_idx);
1513	crtc_id = radeon_get_ib_value(p, h_idx + 5);
1514	reg = CP_PACKET0_GET_REG(header);
1515	obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
1516	if (!obj) {
1517		DRM_ERROR("cannot find crtc %d\n", crtc_id);
1518		return -EINVAL;
1519	}
1520	crtc = obj_to_crtc(obj);
1521	radeon_crtc = to_radeon_crtc(crtc);
1522	crtc_id = radeon_crtc->crtc_id;
1523
1524	if (!crtc->enabled) {
1525		/* if the CRTC isn't enabled - we need to nop out the wait until */
1526		ib[h_idx + 2] = PACKET2(0);
1527		ib[h_idx + 3] = PACKET2(0);
1528	} else if (crtc_id == 1) {
1529		switch (reg) {
1530		case AVIVO_D1MODE_VLINE_START_END:
1531			header &= ~R300_CP_PACKET0_REG_MASK;
1532			header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1533			break;
1534		case RADEON_CRTC_GUI_TRIG_VLINE:
1535			header &= ~R300_CP_PACKET0_REG_MASK;
1536			header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1537			break;
1538		default:
1539			DRM_ERROR("unknown crtc reloc\n");
1540			return -EINVAL;
1541		}
1542		ib[h_idx] = header;
1543		ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1544	}
1545
1546	return 0;
1547}
1548
1549/**
1550 * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
1551 * @parser:		parser structure holding parsing context.
1552 * @data:		pointer to relocation data
1553 * @offset_start:	starting offset
1554 * @offset_mask:	offset mask (to align start offset on)
1555 * @reloc:		reloc informations
1556 *
1557 * Check next packet is relocation packet3, do bo validation and compute
1558 * GPU offset using the provided start.
1559 **/
1560int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
1561			      struct radeon_cs_reloc **cs_reloc)
1562{
1563	struct radeon_cs_chunk *relocs_chunk;
1564	struct radeon_cs_packet p3reloc;
1565	unsigned idx;
1566	int r;
1567
1568	if (p->chunk_relocs_idx == -1) {
1569		DRM_ERROR("No relocation chunk !\n");
1570		return -EINVAL;
1571	}
1572	*cs_reloc = NULL;
1573	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
1574	r = r100_cs_packet_parse(p, &p3reloc, p->idx);
1575	if (r) {
1576		return r;
1577	}
1578	p->idx += p3reloc.count + 2;
1579	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1580		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1581			  p3reloc.idx);
1582		r100_cs_dump_packet(p, &p3reloc);
1583		return -EINVAL;
1584	}
1585	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
1586	if (idx >= relocs_chunk->length_dw) {
1587		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1588			  idx, relocs_chunk->length_dw);
1589		r100_cs_dump_packet(p, &p3reloc);
1590		return -EINVAL;
1591	}
1592	/* FIXME: we assume reloc size is 4 dwords */
1593	*cs_reloc = p->relocs_ptr[(idx / 4)];
1594	return 0;
1595}
1596
1597static int r100_get_vtx_size(uint32_t vtx_fmt)
1598{
1599	int vtx_size;
1600	vtx_size = 2;
1601	/* ordered according to bits in spec */
1602	if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1603		vtx_size++;
1604	if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1605		vtx_size += 3;
1606	if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1607		vtx_size++;
1608	if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1609		vtx_size++;
1610	if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1611		vtx_size += 3;
1612	if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1613		vtx_size++;
1614	if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1615		vtx_size++;
1616	if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1617		vtx_size += 2;
1618	if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1619		vtx_size += 2;
1620	if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1621		vtx_size++;
1622	if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1623		vtx_size += 2;
1624	if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1625		vtx_size++;
1626	if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1627		vtx_size += 2;
1628	if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1629		vtx_size++;
1630	if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1631		vtx_size++;
1632	/* blend weight */
1633	if (vtx_fmt & (0x7 << 15))
1634		vtx_size += (vtx_fmt >> 15) & 0x7;
1635	if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1636		vtx_size += 3;
1637	if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1638		vtx_size += 2;
1639	if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1640		vtx_size++;
1641	if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1642		vtx_size++;
1643	if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1644		vtx_size++;
1645	if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1646		vtx_size++;
1647	return vtx_size;
1648}
1649
1650static int r100_packet0_check(struct radeon_cs_parser *p,
1651			      struct radeon_cs_packet *pkt,
1652			      unsigned idx, unsigned reg)
1653{
1654	struct radeon_cs_reloc *reloc;
1655	struct r100_cs_track *track;
1656	volatile uint32_t *ib;
1657	uint32_t tmp;
1658	int r;
1659	int i, face;
1660	u32 tile_flags = 0;
1661	u32 idx_value;
1662
1663	ib = p->ib.ptr;
1664	track = (struct r100_cs_track *)p->track;
1665
1666	idx_value = radeon_get_ib_value(p, idx);
1667
1668	switch (reg) {
1669	case RADEON_CRTC_GUI_TRIG_VLINE:
1670		r = r100_cs_packet_parse_vline(p);
1671		if (r) {
1672			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1673				  idx, reg);
1674			r100_cs_dump_packet(p, pkt);
1675			return r;
1676		}
1677		break;
1678		/* FIXME: only allow PACKET3 blit? easier to check for out of
1679		 * range access */
1680	case RADEON_DST_PITCH_OFFSET:
1681	case RADEON_SRC_PITCH_OFFSET:
1682		r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1683		if (r)
1684			return r;
1685		break;
1686	case RADEON_RB3D_DEPTHOFFSET:
1687		r = r100_cs_packet_next_reloc(p, &reloc);
1688		if (r) {
1689			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1690				  idx, reg);
1691			r100_cs_dump_packet(p, pkt);
1692			return r;
1693		}
1694		track->zb.robj = reloc->robj;
1695		track->zb.offset = idx_value;
1696		track->zb_dirty = true;
1697		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1698		break;
1699	case RADEON_RB3D_COLOROFFSET:
1700		r = r100_cs_packet_next_reloc(p, &reloc);
1701		if (r) {
1702			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1703				  idx, reg);
1704			r100_cs_dump_packet(p, pkt);
1705			return r;
1706		}
1707		track->cb[0].robj = reloc->robj;
1708		track->cb[0].offset = idx_value;
1709		track->cb_dirty = true;
1710		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1711		break;
1712	case RADEON_PP_TXOFFSET_0:
1713	case RADEON_PP_TXOFFSET_1:
1714	case RADEON_PP_TXOFFSET_2:
1715		i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1716		r = r100_cs_packet_next_reloc(p, &reloc);
1717		if (r) {
1718			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1719				  idx, reg);
1720			r100_cs_dump_packet(p, pkt);
1721			return r;
1722		}
1723		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1724			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1725				tile_flags |= RADEON_TXO_MACRO_TILE;
1726			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1727				tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1728
1729			tmp = idx_value & ~(0x7 << 2);
1730			tmp |= tile_flags;
1731			ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset);
1732		} else
1733			ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1734		track->textures[i].robj = reloc->robj;
1735		track->tex_dirty = true;
1736		break;
1737	case RADEON_PP_CUBIC_OFFSET_T0_0:
1738	case RADEON_PP_CUBIC_OFFSET_T0_1:
1739	case RADEON_PP_CUBIC_OFFSET_T0_2:
1740	case RADEON_PP_CUBIC_OFFSET_T0_3:
1741	case RADEON_PP_CUBIC_OFFSET_T0_4:
1742		i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1743		r = r100_cs_packet_next_reloc(p, &reloc);
1744		if (r) {
1745			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1746				  idx, reg);
1747			r100_cs_dump_packet(p, pkt);
1748			return r;
1749		}
1750		track->textures[0].cube_info[i].offset = idx_value;
1751		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1752		track->textures[0].cube_info[i].robj = reloc->robj;
1753		track->tex_dirty = true;
1754		break;
1755	case RADEON_PP_CUBIC_OFFSET_T1_0:
1756	case RADEON_PP_CUBIC_OFFSET_T1_1:
1757	case RADEON_PP_CUBIC_OFFSET_T1_2:
1758	case RADEON_PP_CUBIC_OFFSET_T1_3:
1759	case RADEON_PP_CUBIC_OFFSET_T1_4:
1760		i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1761		r = r100_cs_packet_next_reloc(p, &reloc);
1762		if (r) {
1763			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1764				  idx, reg);
1765			r100_cs_dump_packet(p, pkt);
1766			return r;
1767		}
1768		track->textures[1].cube_info[i].offset = idx_value;
1769		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1770		track->textures[1].cube_info[i].robj = reloc->robj;
1771		track->tex_dirty = true;
1772		break;
1773	case RADEON_PP_CUBIC_OFFSET_T2_0:
1774	case RADEON_PP_CUBIC_OFFSET_T2_1:
1775	case RADEON_PP_CUBIC_OFFSET_T2_2:
1776	case RADEON_PP_CUBIC_OFFSET_T2_3:
1777	case RADEON_PP_CUBIC_OFFSET_T2_4:
1778		i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1779		r = r100_cs_packet_next_reloc(p, &reloc);
1780		if (r) {
1781			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1782				  idx, reg);
1783			r100_cs_dump_packet(p, pkt);
1784			return r;
1785		}
1786		track->textures[2].cube_info[i].offset = idx_value;
1787		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1788		track->textures[2].cube_info[i].robj = reloc->robj;
1789		track->tex_dirty = true;
1790		break;
1791	case RADEON_RE_WIDTH_HEIGHT:
1792		track->maxy = ((idx_value >> 16) & 0x7FF);
1793		track->cb_dirty = true;
1794		track->zb_dirty = true;
1795		break;
1796	case RADEON_RB3D_COLORPITCH:
1797		r = r100_cs_packet_next_reloc(p, &reloc);
1798		if (r) {
1799			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1800				  idx, reg);
1801			r100_cs_dump_packet(p, pkt);
1802			return r;
1803		}
1804		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1805			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1806				tile_flags |= RADEON_COLOR_TILE_ENABLE;
1807			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1808				tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1809
1810			tmp = idx_value & ~(0x7 << 16);
1811			tmp |= tile_flags;
1812			ib[idx] = tmp;
1813		} else
1814			ib[idx] = idx_value;
1815
1816		track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1817		track->cb_dirty = true;
1818		break;
1819	case RADEON_RB3D_DEPTHPITCH:
1820		track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1821		track->zb_dirty = true;
1822		break;
1823	case RADEON_RB3D_CNTL:
1824		switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1825		case 7:
1826		case 8:
1827		case 9:
1828		case 11:
1829		case 12:
1830			track->cb[0].cpp = 1;
1831			break;
1832		case 3:
1833		case 4:
1834		case 15:
1835			track->cb[0].cpp = 2;
1836			break;
1837		case 6:
1838			track->cb[0].cpp = 4;
1839			break;
1840		default:
1841			DRM_ERROR("Invalid color buffer format (%d) !\n",
1842				  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1843			return -EINVAL;
1844		}
1845		track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1846		track->cb_dirty = true;
1847		track->zb_dirty = true;
1848		break;
1849	case RADEON_RB3D_ZSTENCILCNTL:
1850		switch (idx_value & 0xf) {
1851		case 0:
1852			track->zb.cpp = 2;
1853			break;
1854		case 2:
1855		case 3:
1856		case 4:
1857		case 5:
1858		case 9:
1859		case 11:
1860			track->zb.cpp = 4;
1861			break;
1862		default:
1863			break;
1864		}
1865		track->zb_dirty = true;
1866		break;
1867	case RADEON_RB3D_ZPASS_ADDR:
1868		r = r100_cs_packet_next_reloc(p, &reloc);
1869		if (r) {
1870			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1871				  idx, reg);
1872			r100_cs_dump_packet(p, pkt);
1873			return r;
1874		}
1875		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1876		break;
1877	case RADEON_PP_CNTL:
1878		{
1879			uint32_t temp = idx_value >> 4;
1880			for (i = 0; i < track->num_texture; i++)
1881				track->textures[i].enabled = !!(temp & (1 << i));
1882			track->tex_dirty = true;
1883		}
1884		break;
1885	case RADEON_SE_VF_CNTL:
1886		track->vap_vf_cntl = idx_value;
1887		break;
1888	case RADEON_SE_VTX_FMT:
1889		track->vtx_size = r100_get_vtx_size(idx_value);
1890		break;
1891	case RADEON_PP_TEX_SIZE_0:
1892	case RADEON_PP_TEX_SIZE_1:
1893	case RADEON_PP_TEX_SIZE_2:
1894		i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1895		track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1896		track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1897		track->tex_dirty = true;
1898		break;
1899	case RADEON_PP_TEX_PITCH_0:
1900	case RADEON_PP_TEX_PITCH_1:
1901	case RADEON_PP_TEX_PITCH_2:
1902		i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1903		track->textures[i].pitch = idx_value + 32;
1904		track->tex_dirty = true;
1905		break;
1906	case RADEON_PP_TXFILTER_0:
1907	case RADEON_PP_TXFILTER_1:
1908	case RADEON_PP_TXFILTER_2:
1909		i = (reg - RADEON_PP_TXFILTER_0) / 24;
1910		track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1911						 >> RADEON_MAX_MIP_LEVEL_SHIFT);
1912		tmp = (idx_value >> 23) & 0x7;
1913		if (tmp == 2 || tmp == 6)
1914			track->textures[i].roundup_w = false;
1915		tmp = (idx_value >> 27) & 0x7;
1916		if (tmp == 2 || tmp == 6)
1917			track->textures[i].roundup_h = false;
1918		track->tex_dirty = true;
1919		break;
1920	case RADEON_PP_TXFORMAT_0:
1921	case RADEON_PP_TXFORMAT_1:
1922	case RADEON_PP_TXFORMAT_2:
1923		i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1924		if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1925			track->textures[i].use_pitch = 1;
1926		} else {
1927			track->textures[i].use_pitch = 0;
1928			track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
1929			track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
1930		}
1931		if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1932			track->textures[i].tex_coord_type = 2;
1933		switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1934		case RADEON_TXFORMAT_I8:
1935		case RADEON_TXFORMAT_RGB332:
1936		case RADEON_TXFORMAT_Y8:
1937			track->textures[i].cpp = 1;
1938			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1939			break;
1940		case RADEON_TXFORMAT_AI88:
1941		case RADEON_TXFORMAT_ARGB1555:
1942		case RADEON_TXFORMAT_RGB565:
1943		case RADEON_TXFORMAT_ARGB4444:
1944		case RADEON_TXFORMAT_VYUY422:
1945		case RADEON_TXFORMAT_YVYU422:
1946		case RADEON_TXFORMAT_SHADOW16:
1947		case RADEON_TXFORMAT_LDUDV655:
1948		case RADEON_TXFORMAT_DUDV88:
1949			track->textures[i].cpp = 2;
1950			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1951			break;
1952		case RADEON_TXFORMAT_ARGB8888:
1953		case RADEON_TXFORMAT_RGBA8888:
1954		case RADEON_TXFORMAT_SHADOW32:
1955		case RADEON_TXFORMAT_LDUDUV8888:
1956			track->textures[i].cpp = 4;
1957			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1958			break;
1959		case RADEON_TXFORMAT_DXT1:
1960			track->textures[i].cpp = 1;
1961			track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1962			break;
1963		case RADEON_TXFORMAT_DXT23:
1964		case RADEON_TXFORMAT_DXT45:
1965			track->textures[i].cpp = 1;
1966			track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1967			break;
1968		}
1969		track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1970		track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1971		track->tex_dirty = true;
1972		break;
1973	case RADEON_PP_CUBIC_FACES_0:
1974	case RADEON_PP_CUBIC_FACES_1:
1975	case RADEON_PP_CUBIC_FACES_2:
1976		tmp = idx_value;
1977		i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1978		for (face = 0; face < 4; face++) {
1979			track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1980			track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1981		}
1982		track->tex_dirty = true;
1983		break;
1984	default:
1985		DRM_ERROR("Forbidden register 0x%04X in cs at %d\n",
1986		       reg, idx);
1987		return -EINVAL;
1988	}
1989	return 0;
1990}
1991
1992int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1993					 struct radeon_cs_packet *pkt,
1994					 struct radeon_bo *robj)
1995{
1996	unsigned idx;
1997	u32 value;
1998	idx = pkt->idx + 1;
1999	value = radeon_get_ib_value(p, idx + 2);
2000	if ((value + 1) > radeon_bo_size(robj)) {
2001		DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
2002			  "(need %u have %lu) !\n",
2003			  value + 1,
2004			  radeon_bo_size(robj));
2005		return -EINVAL;
2006	}
2007	return 0;
2008}
2009
2010static int r100_packet3_check(struct radeon_cs_parser *p,
2011			      struct radeon_cs_packet *pkt)
2012{
2013	struct radeon_cs_reloc *reloc;
2014	struct r100_cs_track *track;
2015	unsigned idx;
2016	volatile uint32_t *ib;
2017	int r;
2018
2019	ib = p->ib.ptr;
2020	idx = pkt->idx + 1;
2021	track = (struct r100_cs_track *)p->track;
2022	switch (pkt->opcode) {
2023	case PACKET3_3D_LOAD_VBPNTR:
2024		r = r100_packet3_load_vbpntr(p, pkt, idx);
2025		if (r)
2026			return r;
2027		break;
2028	case PACKET3_INDX_BUFFER:
2029		r = r100_cs_packet_next_reloc(p, &reloc);
2030		if (r) {
2031			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
2032			r100_cs_dump_packet(p, pkt);
2033			return r;
2034		}
2035		ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset);
2036		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
2037		if (r) {
2038			return r;
2039		}
2040		break;
2041	case 0x23:
2042		/* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
2043		r = r100_cs_packet_next_reloc(p, &reloc);
2044		if (r) {
2045			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
2046			r100_cs_dump_packet(p, pkt);
2047			return r;
2048		}
2049		ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset);
2050		track->num_arrays = 1;
2051		track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
2052
2053		track->arrays[0].robj = reloc->robj;
2054		track->arrays[0].esize = track->vtx_size;
2055
2056		track->max_indx = radeon_get_ib_value(p, idx+1);
2057
2058		track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
2059		track->immd_dwords = pkt->count - 1;
2060		r = r100_cs_track_check(p->rdev, track);
2061		if (r)
2062			return r;
2063		break;
2064	case PACKET3_3D_DRAW_IMMD:
2065		if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
2066			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
2067			return -EINVAL;
2068		}
2069		track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
2070		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2071		track->immd_dwords = pkt->count - 1;
2072		r = r100_cs_track_check(p->rdev, track);
2073		if (r)
2074			return r;
2075		break;
2076		/* triggers drawing using in-packet vertex data */
2077	case PACKET3_3D_DRAW_IMMD_2:
2078		if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
2079			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
2080			return -EINVAL;
2081		}
2082		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2083		track->immd_dwords = pkt->count;
2084		r = r100_cs_track_check(p->rdev, track);
2085		if (r)
2086			return r;
2087		break;
2088		/* triggers drawing using in-packet vertex data */
2089	case PACKET3_3D_DRAW_VBUF_2:
2090		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2091		r = r100_cs_track_check(p->rdev, track);
2092		if (r)
2093			return r;
2094		break;
2095		/* triggers drawing of vertex buffers setup elsewhere */
2096	case PACKET3_3D_DRAW_INDX_2:
2097		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2098		r = r100_cs_track_check(p->rdev, track);
2099		if (r)
2100			return r;
2101		break;
2102		/* triggers drawing using indices to vertex buffer */
2103	case PACKET3_3D_DRAW_VBUF:
2104		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2105		r = r100_cs_track_check(p->rdev, track);
2106		if (r)
2107			return r;
2108		break;
2109		/* triggers drawing of vertex buffers setup elsewhere */
2110	case PACKET3_3D_DRAW_INDX:
2111		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2112		r = r100_cs_track_check(p->rdev, track);
2113		if (r)
2114			return r;
2115		break;
2116		/* triggers drawing using indices to vertex buffer */
2117	case PACKET3_3D_CLEAR_HIZ:
2118	case PACKET3_3D_CLEAR_ZMASK:
2119		if (p->rdev->hyperz_filp != p->filp)
2120			return -EINVAL;
2121		break;
2122	case PACKET3_NOP:
2123		break;
2124	default:
2125		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2126		return -EINVAL;
2127	}
2128	return 0;
2129}
2130
2131int r100_cs_parse(struct radeon_cs_parser *p)
2132{
2133	struct radeon_cs_packet pkt;
2134	struct r100_cs_track *track;
2135	int r;
2136
2137	track = malloc(sizeof(*track), DRM_MEM_DRIVER, M_ZERO | M_WAITOK);
2138	if (!track)
2139		return -ENOMEM;
2140	r100_cs_track_clear(p->rdev, track);
2141	p->track = track;
2142	do {
2143		r = r100_cs_packet_parse(p, &pkt, p->idx);
2144		if (r) {
2145			free(p->track, DRM_MEM_DRIVER);
2146			p->track = NULL;
2147			return r;
2148		}
2149		p->idx += pkt.count + 2;
2150		switch (pkt.type) {
2151			case PACKET_TYPE0:
2152				if (p->rdev->family >= CHIP_R200)
2153					r = r100_cs_parse_packet0(p, &pkt,
2154								  p->rdev->config.r100.reg_safe_bm,
2155								  p->rdev->config.r100.reg_safe_bm_size,
2156								  &r200_packet0_check);
2157				else
2158					r = r100_cs_parse_packet0(p, &pkt,
2159								  p->rdev->config.r100.reg_safe_bm,
2160								  p->rdev->config.r100.reg_safe_bm_size,
2161								  &r100_packet0_check);
2162				break;
2163			case PACKET_TYPE2:
2164				break;
2165			case PACKET_TYPE3:
2166				r = r100_packet3_check(p, &pkt);
2167				break;
2168			default:
2169				DRM_ERROR("Unknown packet type %d !\n",
2170					  pkt.type);
2171				free(p->track, DRM_MEM_DRIVER);
2172				p->track = NULL;
2173				return -EINVAL;
2174		}
2175		if (r) {
2176			free(p->track, DRM_MEM_DRIVER);
2177			p->track = NULL;
2178			return r;
2179		}
2180	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2181	free(p->track, DRM_MEM_DRIVER);
2182	p->track = NULL;
2183	return 0;
2184}
2185
2186static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2187{
2188	DRM_ERROR("pitch                      %d\n", t->pitch);
2189	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
2190	DRM_ERROR("width                      %d\n", t->width);
2191	DRM_ERROR("width_11                   %d\n", t->width_11);
2192	DRM_ERROR("height                     %d\n", t->height);
2193	DRM_ERROR("height_11                  %d\n", t->height_11);
2194	DRM_ERROR("num levels                 %d\n", t->num_levels);
2195	DRM_ERROR("depth                      %d\n", t->txdepth);
2196	DRM_ERROR("bpp                        %d\n", t->cpp);
2197	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
2198	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
2199	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2200	DRM_ERROR("compress format            %d\n", t->compress_format);
2201}
2202
2203static int r100_track_compress_size(int compress_format, int w, int h)
2204{
2205	int block_width, block_height, block_bytes;
2206	int wblocks, hblocks;
2207	int min_wblocks;
2208	int sz;
2209
2210	block_width = 4;
2211	block_height = 4;
2212
2213	switch (compress_format) {
2214	case R100_TRACK_COMP_DXT1:
2215		block_bytes = 8;
2216		min_wblocks = 4;
2217		break;
2218	default:
2219	case R100_TRACK_COMP_DXT35:
2220		block_bytes = 16;
2221		min_wblocks = 2;
2222		break;
2223	}
2224
2225	hblocks = (h + block_height - 1) / block_height;
2226	wblocks = (w + block_width - 1) / block_width;
2227	if (wblocks < min_wblocks)
2228		wblocks = min_wblocks;
2229	sz = wblocks * hblocks * block_bytes;
2230	return sz;
2231}
2232
2233static int r100_cs_track_cube(struct radeon_device *rdev,
2234			      struct r100_cs_track *track, unsigned idx)
2235{
2236	unsigned face, w, h;
2237	struct radeon_bo *cube_robj;
2238	unsigned long size;
2239	unsigned compress_format = track->textures[idx].compress_format;
2240
2241	for (face = 0; face < 5; face++) {
2242		cube_robj = track->textures[idx].cube_info[face].robj;
2243		w = track->textures[idx].cube_info[face].width;
2244		h = track->textures[idx].cube_info[face].height;
2245
2246		if (compress_format) {
2247			size = r100_track_compress_size(compress_format, w, h);
2248		} else
2249			size = w * h;
2250		size *= track->textures[idx].cpp;
2251
2252		size += track->textures[idx].cube_info[face].offset;
2253
2254		if (size > radeon_bo_size(cube_robj)) {
2255			DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2256				  size, radeon_bo_size(cube_robj));
2257			r100_cs_track_texture_print(&track->textures[idx]);
2258			return -1;
2259		}
2260	}
2261	return 0;
2262}
2263
2264static int r100_cs_track_texture_check(struct radeon_device *rdev,
2265				       struct r100_cs_track *track)
2266{
2267	struct radeon_bo *robj;
2268	unsigned long size;
2269	unsigned u, i, w, h, d;
2270	int ret;
2271
2272	for (u = 0; u < track->num_texture; u++) {
2273		if (!track->textures[u].enabled)
2274			continue;
2275		if (track->textures[u].lookup_disable)
2276			continue;
2277		robj = track->textures[u].robj;
2278		if (robj == NULL) {
2279			DRM_ERROR("No texture bound to unit %u\n", u);
2280			return -EINVAL;
2281		}
2282		size = 0;
2283		for (i = 0; i <= track->textures[u].num_levels; i++) {
2284			if (track->textures[u].use_pitch) {
2285				if (rdev->family < CHIP_R300)
2286					w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2287				else
2288					w = track->textures[u].pitch / (1 << i);
2289			} else {
2290				w = track->textures[u].width;
2291				if (rdev->family >= CHIP_RV515)
2292					w |= track->textures[u].width_11;
2293				w = w / (1 << i);
2294				if (track->textures[u].roundup_w)
2295					w = roundup_pow_of_two(w);
2296			}
2297			h = track->textures[u].height;
2298			if (rdev->family >= CHIP_RV515)
2299				h |= track->textures[u].height_11;
2300			h = h / (1 << i);
2301			if (track->textures[u].roundup_h)
2302				h = roundup_pow_of_two(h);
2303			if (track->textures[u].tex_coord_type == 1) {
2304				d = (1 << track->textures[u].txdepth) / (1 << i);
2305				if (!d)
2306					d = 1;
2307			} else {
2308				d = 1;
2309			}
2310			if (track->textures[u].compress_format) {
2311
2312				size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2313				/* compressed textures are block based */
2314			} else
2315				size += w * h * d;
2316		}
2317		size *= track->textures[u].cpp;
2318
2319		switch (track->textures[u].tex_coord_type) {
2320		case 0:
2321		case 1:
2322			break;
2323		case 2:
2324			if (track->separate_cube) {
2325				ret = r100_cs_track_cube(rdev, track, u);
2326				if (ret)
2327					return ret;
2328			} else
2329				size *= 6;
2330			break;
2331		default:
2332			DRM_ERROR("Invalid texture coordinate type %u for unit "
2333				  "%u\n", track->textures[u].tex_coord_type, u);
2334			return -EINVAL;
2335		}
2336		if (size > radeon_bo_size(robj)) {
2337			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2338				  "%lu\n", u, size, radeon_bo_size(robj));
2339			r100_cs_track_texture_print(&track->textures[u]);
2340			return -EINVAL;
2341		}
2342	}
2343	return 0;
2344}
2345
2346int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2347{
2348	unsigned i;
2349	unsigned long size;
2350	unsigned prim_walk;
2351	unsigned nverts;
2352	unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2353
2354	if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2355	    !track->blend_read_enable)
2356		num_cb = 0;
2357
2358	for (i = 0; i < num_cb; i++) {
2359		if (track->cb[i].robj == NULL) {
2360			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2361			return -EINVAL;
2362		}
2363		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2364		size += track->cb[i].offset;
2365		if (size > radeon_bo_size(track->cb[i].robj)) {
2366			DRM_ERROR("[drm] Buffer too small for color buffer %d "
2367				  "(need %lu have %lu) !\n", i, size,
2368				  radeon_bo_size(track->cb[i].robj));
2369			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2370				  i, track->cb[i].pitch, track->cb[i].cpp,
2371				  track->cb[i].offset, track->maxy);
2372			return -EINVAL;
2373		}
2374	}
2375	track->cb_dirty = false;
2376
2377	if (track->zb_dirty && track->z_enabled) {
2378		if (track->zb.robj == NULL) {
2379			DRM_ERROR("[drm] No buffer for z buffer !\n");
2380			return -EINVAL;
2381		}
2382		size = track->zb.pitch * track->zb.cpp * track->maxy;
2383		size += track->zb.offset;
2384		if (size > radeon_bo_size(track->zb.robj)) {
2385			DRM_ERROR("[drm] Buffer too small for z buffer "
2386				  "(need %lu have %lu) !\n", size,
2387				  radeon_bo_size(track->zb.robj));
2388			DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2389				  track->zb.pitch, track->zb.cpp,
2390				  track->zb.offset, track->maxy);
2391			return -EINVAL;
2392		}
2393	}
2394	track->zb_dirty = false;
2395
2396	if (track->aa_dirty && track->aaresolve) {
2397		if (track->aa.robj == NULL) {
2398			DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2399			return -EINVAL;
2400		}
2401		/* I believe the format comes from colorbuffer0. */
2402		size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2403		size += track->aa.offset;
2404		if (size > radeon_bo_size(track->aa.robj)) {
2405			DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2406				  "(need %lu have %lu) !\n", i, size,
2407				  radeon_bo_size(track->aa.robj));
2408			DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2409				  i, track->aa.pitch, track->cb[0].cpp,
2410				  track->aa.offset, track->maxy);
2411			return -EINVAL;
2412		}
2413	}
2414	track->aa_dirty = false;
2415
2416	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2417	if (track->vap_vf_cntl & (1 << 14)) {
2418		nverts = track->vap_alt_nverts;
2419	} else {
2420		nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2421	}
2422	switch (prim_walk) {
2423	case 1:
2424		for (i = 0; i < track->num_arrays; i++) {
2425			size = track->arrays[i].esize * track->max_indx * 4;
2426			if (track->arrays[i].robj == NULL) {
2427				DRM_ERROR("(PW %u) Vertex array %u no buffer "
2428					  "bound\n", prim_walk, i);
2429				return -EINVAL;
2430			}
2431			if (size > radeon_bo_size(track->arrays[i].robj)) {
2432				dev_err(rdev->dev, "(PW %u) Vertex array %u "
2433					"need %lu dwords have %lu dwords\n",
2434					prim_walk, i, size >> 2,
2435					radeon_bo_size(track->arrays[i].robj)
2436					>> 2);
2437				DRM_ERROR("Max indices %u\n", track->max_indx);
2438				return -EINVAL;
2439			}
2440		}
2441		break;
2442	case 2:
2443		for (i = 0; i < track->num_arrays; i++) {
2444			size = track->arrays[i].esize * (nverts - 1) * 4;
2445			if (track->arrays[i].robj == NULL) {
2446				DRM_ERROR("(PW %u) Vertex array %u no buffer "
2447					  "bound\n", prim_walk, i);
2448				return -EINVAL;
2449			}
2450			if (size > radeon_bo_size(track->arrays[i].robj)) {
2451				dev_err(rdev->dev, "(PW %u) Vertex array %u "
2452					"need %lu dwords have %lu dwords\n",
2453					prim_walk, i, size >> 2,
2454					radeon_bo_size(track->arrays[i].robj)
2455					>> 2);
2456				return -EINVAL;
2457			}
2458		}
2459		break;
2460	case 3:
2461		size = track->vtx_size * nverts;
2462		if (size != track->immd_dwords) {
2463			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2464				  track->immd_dwords, size);
2465			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2466				  nverts, track->vtx_size);
2467			return -EINVAL;
2468		}
2469		break;
2470	default:
2471		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2472			  prim_walk);
2473		return -EINVAL;
2474	}
2475
2476	if (track->tex_dirty) {
2477		track->tex_dirty = false;
2478		return r100_cs_track_texture_check(rdev, track);
2479	}
2480	return 0;
2481}
2482
2483void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2484{
2485	unsigned i, face;
2486
2487	track->cb_dirty = true;
2488	track->zb_dirty = true;
2489	track->tex_dirty = true;
2490	track->aa_dirty = true;
2491
2492	if (rdev->family < CHIP_R300) {
2493		track->num_cb = 1;
2494		if (rdev->family <= CHIP_RS200)
2495			track->num_texture = 3;
2496		else
2497			track->num_texture = 6;
2498		track->maxy = 2048;
2499		track->separate_cube = 1;
2500	} else {
2501		track->num_cb = 4;
2502		track->num_texture = 16;
2503		track->maxy = 4096;
2504		track->separate_cube = 0;
2505		track->aaresolve = false;
2506		track->aa.robj = NULL;
2507	}
2508
2509	for (i = 0; i < track->num_cb; i++) {
2510		track->cb[i].robj = NULL;
2511		track->cb[i].pitch = 8192;
2512		track->cb[i].cpp = 16;
2513		track->cb[i].offset = 0;
2514	}
2515	track->z_enabled = true;
2516	track->zb.robj = NULL;
2517	track->zb.pitch = 8192;
2518	track->zb.cpp = 4;
2519	track->zb.offset = 0;
2520	track->vtx_size = 0x7F;
2521	track->immd_dwords = 0xFFFFFFFFUL;
2522	track->num_arrays = 11;
2523	track->max_indx = 0x00FFFFFFUL;
2524	for (i = 0; i < track->num_arrays; i++) {
2525		track->arrays[i].robj = NULL;
2526		track->arrays[i].esize = 0x7F;
2527	}
2528	for (i = 0; i < track->num_texture; i++) {
2529		track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2530		track->textures[i].pitch = 16536;
2531		track->textures[i].width = 16536;
2532		track->textures[i].height = 16536;
2533		track->textures[i].width_11 = 1 << 11;
2534		track->textures[i].height_11 = 1 << 11;
2535		track->textures[i].num_levels = 12;
2536		if (rdev->family <= CHIP_RS200) {
2537			track->textures[i].tex_coord_type = 0;
2538			track->textures[i].txdepth = 0;
2539		} else {
2540			track->textures[i].txdepth = 16;
2541			track->textures[i].tex_coord_type = 1;
2542		}
2543		track->textures[i].cpp = 64;
2544		track->textures[i].robj = NULL;
2545		/* CS IB emission code makes sure texture unit are disabled */
2546		track->textures[i].enabled = false;
2547		track->textures[i].lookup_disable = false;
2548		track->textures[i].roundup_w = true;
2549		track->textures[i].roundup_h = true;
2550		if (track->separate_cube)
2551			for (face = 0; face < 5; face++) {
2552				track->textures[i].cube_info[face].robj = NULL;
2553				track->textures[i].cube_info[face].width = 16536;
2554				track->textures[i].cube_info[face].height = 16536;
2555				track->textures[i].cube_info[face].offset = 0;
2556			}
2557	}
2558}
2559
2560/*
2561 * Global GPU functions
2562 */
2563static void r100_errata(struct radeon_device *rdev)
2564{
2565	rdev->pll_errata = 0;
2566
2567	if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2568		rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2569	}
2570
2571	if (rdev->family == CHIP_RV100 ||
2572	    rdev->family == CHIP_RS100 ||
2573	    rdev->family == CHIP_RS200) {
2574		rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2575	}
2576}
2577
2578static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2579{
2580	unsigned i;
2581	uint32_t tmp;
2582
2583	for (i = 0; i < rdev->usec_timeout; i++) {
2584		tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2585		if (tmp >= n) {
2586			return 0;
2587		}
2588		DRM_UDELAY(1);
2589	}
2590	return -1;
2591}
2592
2593int r100_gui_wait_for_idle(struct radeon_device *rdev)
2594{
2595	unsigned i;
2596	uint32_t tmp;
2597
2598	if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2599		DRM_ERROR("radeon: wait for empty RBBM fifo failed !"
2600		       " Bad things might happen.\n");
2601	}
2602	for (i = 0; i < rdev->usec_timeout; i++) {
2603		tmp = RREG32(RADEON_RBBM_STATUS);
2604		if (!(tmp & RADEON_RBBM_ACTIVE)) {
2605			return 0;
2606		}
2607		DRM_UDELAY(1);
2608	}
2609	return -1;
2610}
2611
2612int r100_mc_wait_for_idle(struct radeon_device *rdev)
2613{
2614	unsigned i;
2615	uint32_t tmp;
2616
2617	for (i = 0; i < rdev->usec_timeout; i++) {
2618		/* read MC_STATUS */
2619		tmp = RREG32(RADEON_MC_STATUS);
2620		if (tmp & RADEON_MC_IDLE) {
2621			return 0;
2622		}
2623		DRM_UDELAY(1);
2624	}
2625	return -1;
2626}
2627
2628bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2629{
2630	u32 rbbm_status;
2631
2632	rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2633	if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2634		radeon_ring_lockup_update(ring);
2635		return false;
2636	}
2637	/* force CP activities */
2638	radeon_ring_force_activity(rdev, ring);
2639	return radeon_ring_test_lockup(rdev, ring);
2640}
2641
2642/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2643void r100_enable_bm(struct radeon_device *rdev)
2644{
2645	uint32_t tmp;
2646	/* Enable bus mastering */
2647	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2648	WREG32(RADEON_BUS_CNTL, tmp);
2649}
2650
2651void r100_bm_disable(struct radeon_device *rdev)
2652{
2653	u32 tmp;
2654
2655	/* disable bus mastering */
2656	tmp = RREG32(R_000030_BUS_CNTL);
2657	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2658	mdelay(1);
2659	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2660	mdelay(1);
2661	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2662	tmp = RREG32(RADEON_BUS_CNTL);
2663	mdelay(1);
2664	pci_disable_busmaster(rdev->dev);
2665	mdelay(1);
2666}
2667
2668int r100_asic_reset(struct radeon_device *rdev)
2669{
2670	struct r100_mc_save save;
2671	u32 status, tmp;
2672	int ret = 0;
2673
2674	status = RREG32(R_000E40_RBBM_STATUS);
2675	if (!G_000E40_GUI_ACTIVE(status)) {
2676		return 0;
2677	}
2678	r100_mc_stop(rdev, &save);
2679	status = RREG32(R_000E40_RBBM_STATUS);
2680	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2681	/* stop CP */
2682	WREG32(RADEON_CP_CSQ_CNTL, 0);
2683	tmp = RREG32(RADEON_CP_RB_CNTL);
2684	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2685	WREG32(RADEON_CP_RB_RPTR_WR, 0);
2686	WREG32(RADEON_CP_RB_WPTR, 0);
2687	WREG32(RADEON_CP_RB_CNTL, tmp);
2688	/* save PCI state */
2689	pci_save_state(device_get_parent(rdev->dev));
2690	/* disable bus mastering */
2691	r100_bm_disable(rdev);
2692	WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2693					S_0000F0_SOFT_RESET_RE(1) |
2694					S_0000F0_SOFT_RESET_PP(1) |
2695					S_0000F0_SOFT_RESET_RB(1));
2696	RREG32(R_0000F0_RBBM_SOFT_RESET);
2697	mdelay(500);
2698	WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2699	mdelay(1);
2700	status = RREG32(R_000E40_RBBM_STATUS);
2701	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2702	/* reset CP */
2703	WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2704	RREG32(R_0000F0_RBBM_SOFT_RESET);
2705	mdelay(500);
2706	WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2707	mdelay(1);
2708	status = RREG32(R_000E40_RBBM_STATUS);
2709	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2710	/* restore PCI & busmastering */
2711	pci_restore_state(device_get_parent(rdev->dev));
2712	r100_enable_bm(rdev);
2713	/* Check if GPU is idle */
2714	if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2715		G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2716		dev_err(rdev->dev, "failed to reset GPU\n");
2717		ret = -1;
2718	} else
2719		dev_info(rdev->dev, "GPU reset succeed\n");
2720	r100_mc_resume(rdev, &save);
2721	return ret;
2722}
2723
2724void r100_set_common_regs(struct radeon_device *rdev)
2725{
2726	struct drm_device *dev = rdev->ddev;
2727	bool force_dac2 = false;
2728	u32 tmp;
2729
2730	/* set these so they don't interfere with anything */
2731	WREG32(RADEON_OV0_SCALE_CNTL, 0);
2732	WREG32(RADEON_SUBPIC_CNTL, 0);
2733	WREG32(RADEON_VIPH_CONTROL, 0);
2734	WREG32(RADEON_I2C_CNTL_1, 0);
2735	WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2736	WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2737	WREG32(RADEON_CAP1_TRIG_CNTL, 0);
2738
2739	/* always set up dac2 on rn50 and some rv100 as lots
2740	 * of servers seem to wire it up to a VGA port but
2741	 * don't report it in the bios connector
2742	 * table.
2743	 */
2744	switch (dev->pci_device) {
2745		/* RN50 */
2746	case 0x515e:
2747	case 0x5969:
2748		force_dac2 = true;
2749		break;
2750		/* RV100*/
2751	case 0x5159:
2752	case 0x515a:
2753		/* DELL triple head servers */
2754		if ((dev->pci_subvendor == 0x1028 /* DELL */) &&
2755		    ((dev->pci_subdevice == 0x016c) ||
2756		     (dev->pci_subdevice == 0x016d) ||
2757		     (dev->pci_subdevice == 0x016e) ||
2758		     (dev->pci_subdevice == 0x016f) ||
2759		     (dev->pci_subdevice == 0x0170) ||
2760		     (dev->pci_subdevice == 0x017d) ||
2761		     (dev->pci_subdevice == 0x017e) ||
2762		     (dev->pci_subdevice == 0x0183) ||
2763		     (dev->pci_subdevice == 0x018a) ||
2764		     (dev->pci_subdevice == 0x019a)))
2765			force_dac2 = true;
2766		break;
2767	}
2768
2769	if (force_dac2) {
2770		u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2771		u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2772		u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2773
2774		/* For CRT on DAC2, don't turn it on if BIOS didn't
2775		   enable it, even it's detected.
2776		*/
2777
2778		/* force it to crtc0 */
2779		dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2780		dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2781		disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2782
2783		/* set up the TV DAC */
2784		tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2785				 RADEON_TV_DAC_STD_MASK |
2786				 RADEON_TV_DAC_RDACPD |
2787				 RADEON_TV_DAC_GDACPD |
2788				 RADEON_TV_DAC_BDACPD |
2789				 RADEON_TV_DAC_BGADJ_MASK |
2790				 RADEON_TV_DAC_DACADJ_MASK);
2791		tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2792				RADEON_TV_DAC_NHOLD |
2793				RADEON_TV_DAC_STD_PS2 |
2794				(0x58 << 16));
2795
2796		WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2797		WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2798		WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2799	}
2800
2801	/* switch PM block to ACPI mode */
2802	tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2803	tmp &= ~RADEON_PM_MODE_SEL;
2804	WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2805
2806}
2807
2808/*
2809 * VRAM info
2810 */
2811static void r100_vram_get_type(struct radeon_device *rdev)
2812{
2813	uint32_t tmp;
2814
2815	rdev->mc.vram_is_ddr = false;
2816	if (rdev->flags & RADEON_IS_IGP)
2817		rdev->mc.vram_is_ddr = true;
2818	else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2819		rdev->mc.vram_is_ddr = true;
2820	if ((rdev->family == CHIP_RV100) ||
2821	    (rdev->family == CHIP_RS100) ||
2822	    (rdev->family == CHIP_RS200)) {
2823		tmp = RREG32(RADEON_MEM_CNTL);
2824		if (tmp & RV100_HALF_MODE) {
2825			rdev->mc.vram_width = 32;
2826		} else {
2827			rdev->mc.vram_width = 64;
2828		}
2829		if (rdev->flags & RADEON_SINGLE_CRTC) {
2830			rdev->mc.vram_width /= 4;
2831			rdev->mc.vram_is_ddr = true;
2832		}
2833	} else if (rdev->family <= CHIP_RV280) {
2834		tmp = RREG32(RADEON_MEM_CNTL);
2835		if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2836			rdev->mc.vram_width = 128;
2837		} else {
2838			rdev->mc.vram_width = 64;
2839		}
2840	} else {
2841		/* newer IGPs */
2842		rdev->mc.vram_width = 128;
2843	}
2844}
2845
2846static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2847{
2848	u32 aper_size;
2849	u8 byte;
2850
2851	aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2852
2853	/* Set HDP_APER_CNTL only on cards that are known not to be broken,
2854	 * that is has the 2nd generation multifunction PCI interface
2855	 */
2856	if (rdev->family == CHIP_RV280 ||
2857	    rdev->family >= CHIP_RV350) {
2858		WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2859		       ~RADEON_HDP_APER_CNTL);
2860		DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2861		return aper_size * 2;
2862	}
2863
2864	/* Older cards have all sorts of funny issues to deal with. First
2865	 * check if it's a multifunction card by reading the PCI config
2866	 * header type... Limit those to one aperture size
2867	 */
2868	byte = pci_read_config(rdev->dev, 0xe, 1);
2869	if (byte & 0x80) {
2870		DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2871		DRM_INFO("Limiting VRAM to one aperture\n");
2872		return aper_size;
2873	}
2874
2875	/* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2876	 * have set it up. We don't write this as it's broken on some ASICs but
2877	 * we expect the BIOS to have done the right thing (might be too optimistic...)
2878	 */
2879	if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2880		return aper_size * 2;
2881	return aper_size;
2882}
2883
2884void r100_vram_init_sizes(struct radeon_device *rdev)
2885{
2886	u64 config_aper_size;
2887
2888	/* work out accessible VRAM */
2889	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
2890	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
2891	rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2892	/* FIXME we don't use the second aperture yet when we could use it */
2893	if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2894		rdev->mc.visible_vram_size = rdev->mc.aper_size;
2895	config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2896	if (rdev->flags & RADEON_IS_IGP) {
2897		uint32_t tom;
2898		/* read NB_TOM to get the amount of ram stolen for the GPU */
2899		tom = RREG32(RADEON_NB_TOM);
2900		rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2901		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2902		rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2903	} else {
2904		rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2905		/* Some production boards of m6 will report 0
2906		 * if it's 8 MB
2907		 */
2908		if (rdev->mc.real_vram_size == 0) {
2909			rdev->mc.real_vram_size = 8192 * 1024;
2910			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2911		}
2912		/* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
2913		 * Novell bug 204882 + along with lots of ubuntu ones
2914		 */
2915		if (rdev->mc.aper_size > config_aper_size)
2916			config_aper_size = rdev->mc.aper_size;
2917
2918		if (config_aper_size > rdev->mc.real_vram_size)
2919			rdev->mc.mc_vram_size = config_aper_size;
2920		else
2921			rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2922	}
2923}
2924
2925void r100_vga_set_state(struct radeon_device *rdev, bool state)
2926{
2927	uint32_t temp;
2928
2929	temp = RREG32(RADEON_CONFIG_CNTL);
2930	if (state == false) {
2931		temp &= ~RADEON_CFG_VGA_RAM_EN;
2932		temp |= RADEON_CFG_VGA_IO_DIS;
2933	} else {
2934		temp &= ~RADEON_CFG_VGA_IO_DIS;
2935	}
2936	WREG32(RADEON_CONFIG_CNTL, temp);
2937}
2938
2939static void r100_mc_init(struct radeon_device *rdev)
2940{
2941	u64 base;
2942
2943	r100_vram_get_type(rdev);
2944	r100_vram_init_sizes(rdev);
2945	base = rdev->mc.aper_base;
2946	if (rdev->flags & RADEON_IS_IGP)
2947		base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2948	radeon_vram_location(rdev, &rdev->mc, base);
2949	rdev->mc.gtt_base_align = 0;
2950	if (!(rdev->flags & RADEON_IS_AGP))
2951		radeon_gtt_location(rdev, &rdev->mc);
2952	radeon_update_bandwidth_info(rdev);
2953}
2954
2955
2956/*
2957 * Indirect registers accessor
2958 */
2959void r100_pll_errata_after_index(struct radeon_device *rdev)
2960{
2961	if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2962		(void)RREG32(RADEON_CLOCK_CNTL_DATA);
2963		(void)RREG32(RADEON_CRTC_GEN_CNTL);
2964	}
2965}
2966
2967static void r100_pll_errata_after_data(struct radeon_device *rdev)
2968{
2969	/* This workarounds is necessary on RV100, RS100 and RS200 chips
2970	 * or the chip could hang on a subsequent access
2971	 */
2972	if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2973		mdelay(5);
2974	}
2975
2976	/* This function is required to workaround a hardware bug in some (all?)
2977	 * revisions of the R300.  This workaround should be called after every
2978	 * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
2979	 * may not be correct.
2980	 */
2981	if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2982		uint32_t save, tmp;
2983
2984		save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2985		tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2986		WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2987		tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2988		WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2989	}
2990}
2991
2992uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2993{
2994	uint32_t data;
2995
2996	WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2997	r100_pll_errata_after_index(rdev);
2998	data = RREG32(RADEON_CLOCK_CNTL_DATA);
2999	r100_pll_errata_after_data(rdev);
3000	return data;
3001}
3002
3003void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
3004{
3005	WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
3006	r100_pll_errata_after_index(rdev);
3007	WREG32(RADEON_CLOCK_CNTL_DATA, v);
3008	r100_pll_errata_after_data(rdev);
3009}
3010
3011static void r100_set_safe_registers(struct radeon_device *rdev)
3012{
3013	if (ASIC_IS_RN50(rdev)) {
3014		rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
3015		rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
3016	} else if (rdev->family < CHIP_R200) {
3017		rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
3018		rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
3019	} else {
3020		r200_set_safe_registers(rdev);
3021	}
3022}
3023
3024/*
3025 * Debugfs info
3026 */
3027#if defined(CONFIG_DEBUG_FS)
3028static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
3029{
3030	struct drm_info_node *node = (struct drm_info_node *) m->private;
3031	struct drm_device *dev = node->minor->dev;
3032	struct radeon_device *rdev = dev->dev_private;
3033	uint32_t reg, value;
3034	unsigned i;
3035
3036	seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
3037	seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
3038	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
3039	for (i = 0; i < 64; i++) {
3040		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
3041		reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
3042		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
3043		value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
3044		seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
3045	}
3046	return 0;
3047}
3048
3049static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
3050{
3051	struct drm_info_node *node = (struct drm_info_node *) m->private;
3052	struct drm_device *dev = node->minor->dev;
3053	struct radeon_device *rdev = dev->dev_private;
3054	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3055	uint32_t rdp, wdp;
3056	unsigned count, i, j;
3057
3058	radeon_ring_free_size(rdev, ring);
3059	rdp = RREG32(RADEON_CP_RB_RPTR);
3060	wdp = RREG32(RADEON_CP_RB_WPTR);
3061	count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
3062	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
3063	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
3064	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
3065	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
3066	seq_printf(m, "%u dwords in ring\n", count);
3067	for (j = 0; j <= count; j++) {
3068		i = (rdp + j) & ring->ptr_mask;
3069		seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
3070	}
3071	return 0;
3072}
3073
3074
3075static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
3076{
3077	struct drm_info_node *node = (struct drm_info_node *) m->private;
3078	struct drm_device *dev = node->minor->dev;
3079	struct radeon_device *rdev = dev->dev_private;
3080	uint32_t csq_stat, csq2_stat, tmp;
3081	unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
3082	unsigned i;
3083
3084	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
3085	seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
3086	csq_stat = RREG32(RADEON_CP_CSQ_STAT);
3087	csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
3088	r_rptr = (csq_stat >> 0) & 0x3ff;
3089	r_wptr = (csq_stat >> 10) & 0x3ff;
3090	ib1_rptr = (csq_stat >> 20) & 0x3ff;
3091	ib1_wptr = (csq2_stat >> 0) & 0x3ff;
3092	ib2_rptr = (csq2_stat >> 10) & 0x3ff;
3093	ib2_wptr = (csq2_stat >> 20) & 0x3ff;
3094	seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
3095	seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
3096	seq_printf(m, "Ring rptr %u\n", r_rptr);
3097	seq_printf(m, "Ring wptr %u\n", r_wptr);
3098	seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
3099	seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
3100	seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
3101	seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
3102	/* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
3103	 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
3104	seq_printf(m, "Ring fifo:\n");
3105	for (i = 0; i < 256; i++) {
3106		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3107		tmp = RREG32(RADEON_CP_CSQ_DATA);
3108		seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
3109	}
3110	seq_printf(m, "Indirect1 fifo:\n");
3111	for (i = 256; i <= 512; i++) {
3112		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3113		tmp = RREG32(RADEON_CP_CSQ_DATA);
3114		seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
3115	}
3116	seq_printf(m, "Indirect2 fifo:\n");
3117	for (i = 640; i < ib1_wptr; i++) {
3118		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3119		tmp = RREG32(RADEON_CP_CSQ_DATA);
3120		seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
3121	}
3122	return 0;
3123}
3124
3125static int r100_debugfs_mc_info(struct seq_file *m, void *data)
3126{
3127	struct drm_info_node *node = (struct drm_info_node *) m->private;
3128	struct drm_device *dev = node->minor->dev;
3129	struct radeon_device *rdev = dev->dev_private;
3130	uint32_t tmp;
3131
3132	tmp = RREG32(RADEON_CONFIG_MEMSIZE);
3133	seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
3134	tmp = RREG32(RADEON_MC_FB_LOCATION);
3135	seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
3136	tmp = RREG32(RADEON_BUS_CNTL);
3137	seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
3138	tmp = RREG32(RADEON_MC_AGP_LOCATION);
3139	seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
3140	tmp = RREG32(RADEON_AGP_BASE);
3141	seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
3142	tmp = RREG32(RADEON_HOST_PATH_CNTL);
3143	seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
3144	tmp = RREG32(0x01D0);
3145	seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
3146	tmp = RREG32(RADEON_AIC_LO_ADDR);
3147	seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
3148	tmp = RREG32(RADEON_AIC_HI_ADDR);
3149	seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
3150	tmp = RREG32(0x01E4);
3151	seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
3152	return 0;
3153}
3154
3155static struct drm_info_list r100_debugfs_rbbm_list[] = {
3156	{"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
3157};
3158
3159static struct drm_info_list r100_debugfs_cp_list[] = {
3160	{"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
3161	{"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
3162};
3163
3164static struct drm_info_list r100_debugfs_mc_info_list[] = {
3165	{"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
3166};
3167#endif
3168
3169int r100_debugfs_rbbm_init(struct radeon_device *rdev)
3170{
3171#if defined(CONFIG_DEBUG_FS)
3172	return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
3173#else
3174	return 0;
3175#endif
3176}
3177
3178int r100_debugfs_cp_init(struct radeon_device *rdev)
3179{
3180#if defined(CONFIG_DEBUG_FS)
3181	return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
3182#else
3183	return 0;
3184#endif
3185}
3186
3187int r100_debugfs_mc_info_init(struct radeon_device *rdev)
3188{
3189#if defined(CONFIG_DEBUG_FS)
3190	return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
3191#else
3192	return 0;
3193#endif
3194}
3195
3196int r100_set_surface_reg(struct radeon_device *rdev, int reg,
3197			 uint32_t tiling_flags, uint32_t pitch,
3198			 uint32_t offset, uint32_t obj_size)
3199{
3200	int surf_index = reg * 16;
3201	int flags = 0;
3202
3203	if (rdev->family <= CHIP_RS200) {
3204		if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3205				 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3206			flags |= RADEON_SURF_TILE_COLOR_BOTH;
3207		if (tiling_flags & RADEON_TILING_MACRO)
3208			flags |= RADEON_SURF_TILE_COLOR_MACRO;
3209	} else if (rdev->family <= CHIP_RV280) {
3210		if (tiling_flags & (RADEON_TILING_MACRO))
3211			flags |= R200_SURF_TILE_COLOR_MACRO;
3212		if (tiling_flags & RADEON_TILING_MICRO)
3213			flags |= R200_SURF_TILE_COLOR_MICRO;
3214	} else {
3215		if (tiling_flags & RADEON_TILING_MACRO)
3216			flags |= R300_SURF_TILE_MACRO;
3217		if (tiling_flags & RADEON_TILING_MICRO)
3218			flags |= R300_SURF_TILE_MICRO;
3219	}
3220
3221	if (tiling_flags & RADEON_TILING_SWAP_16BIT)
3222		flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
3223	if (tiling_flags & RADEON_TILING_SWAP_32BIT)
3224		flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
3225
3226	/* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */
3227	if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) {
3228		if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO)))
3229			if (ASIC_IS_RN50(rdev))
3230				pitch /= 16;
3231	}
3232
3233	/* r100/r200 divide by 16 */
3234	if (rdev->family < CHIP_R300)
3235		flags |= pitch / 16;
3236	else
3237		flags |= pitch / 8;
3238
3239
3240	DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
3241	WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
3242	WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
3243	WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
3244	return 0;
3245}
3246
3247void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
3248{
3249	int surf_index = reg * 16;
3250	WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
3251}
3252
3253void r100_bandwidth_update(struct radeon_device *rdev)
3254{
3255	fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
3256	fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
3257	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
3258	uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
3259	fixed20_12 memtcas_ff[8] = {
3260		dfixed_init(1),
3261		dfixed_init(2),
3262		dfixed_init(3),
3263		dfixed_init(0),
3264		dfixed_init_half(1),
3265		dfixed_init_half(2),
3266		dfixed_init(0),
3267	};
3268	fixed20_12 memtcas_rs480_ff[8] = {
3269		dfixed_init(0),
3270		dfixed_init(1),
3271		dfixed_init(2),
3272		dfixed_init(3),
3273		dfixed_init(0),
3274		dfixed_init_half(1),
3275		dfixed_init_half(2),
3276		dfixed_init_half(3),
3277	};
3278	fixed20_12 memtcas2_ff[8] = {
3279		dfixed_init(0),
3280		dfixed_init(1),
3281		dfixed_init(2),
3282		dfixed_init(3),
3283		dfixed_init(4),
3284		dfixed_init(5),
3285		dfixed_init(6),
3286		dfixed_init(7),
3287	};
3288	fixed20_12 memtrbs[8] = {
3289		dfixed_init(1),
3290		dfixed_init_half(1),
3291		dfixed_init(2),
3292		dfixed_init_half(2),
3293		dfixed_init(3),
3294		dfixed_init_half(3),
3295		dfixed_init(4),
3296		dfixed_init_half(4)
3297	};
3298	fixed20_12 memtrbs_r4xx[8] = {
3299		dfixed_init(4),
3300		dfixed_init(5),
3301		dfixed_init(6),
3302		dfixed_init(7),
3303		dfixed_init(8),
3304		dfixed_init(9),
3305		dfixed_init(10),
3306		dfixed_init(11)
3307	};
3308	fixed20_12 min_mem_eff;
3309	fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
3310	fixed20_12 cur_latency_mclk, cur_latency_sclk;
3311	fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
3312		disp_drain_rate2, read_return_rate;
3313	fixed20_12 time_disp1_drop_priority;
3314	int c;
3315	int cur_size = 16;       /* in octawords */
3316	int critical_point = 0, critical_point2;
3317/* 	uint32_t read_return_rate, time_disp1_drop_priority; */
3318	int stop_req, max_stop_req;
3319	struct drm_display_mode *mode1 = NULL;
3320	struct drm_display_mode *mode2 = NULL;
3321	uint32_t pixel_bytes1 = 0;
3322	uint32_t pixel_bytes2 = 0;
3323
3324	radeon_update_display_priority(rdev);
3325
3326	if (rdev->mode_info.crtcs[0]->base.enabled) {
3327		mode1 = &rdev->mode_info.crtcs[0]->base.mode;
3328		pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8;
3329	}
3330	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3331		if (rdev->mode_info.crtcs[1]->base.enabled) {
3332			mode2 = &rdev->mode_info.crtcs[1]->base.mode;
3333			pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
3334		}
3335	}
3336
3337	min_mem_eff.full = dfixed_const_8(0);
3338	/* get modes */
3339	if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
3340		uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
3341		mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
3342		mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
3343		/* check crtc enables */
3344		if (mode2)
3345			mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
3346		if (mode1)
3347			mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
3348		WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
3349	}
3350
3351	/*
3352	 * determine is there is enough bw for current mode
3353	 */
3354	sclk_ff = rdev->pm.sclk;
3355	mclk_ff = rdev->pm.mclk;
3356
3357	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
3358	temp_ff.full = dfixed_const(temp);
3359	mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
3360
3361	pix_clk.full = 0;
3362	pix_clk2.full = 0;
3363	peak_disp_bw.full = 0;
3364	if (mode1) {
3365		temp_ff.full = dfixed_const(1000);
3366		pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
3367		pix_clk.full = dfixed_div(pix_clk, temp_ff);
3368		temp_ff.full = dfixed_const(pixel_bytes1);
3369		peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
3370	}
3371	if (mode2) {
3372		temp_ff.full = dfixed_const(1000);
3373		pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
3374		pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
3375		temp_ff.full = dfixed_const(pixel_bytes2);
3376		peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
3377	}
3378
3379	mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
3380	if (peak_disp_bw.full >= mem_bw.full) {
3381		DRM_ERROR("You may not have enough display bandwidth for current mode\n"
3382			  "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
3383	}
3384
3385	/*  Get values from the EXT_MEM_CNTL register...converting its contents. */
3386	temp = RREG32(RADEON_MEM_TIMING_CNTL);
3387	if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
3388		mem_trcd = ((temp >> 2) & 0x3) + 1;
3389		mem_trp  = ((temp & 0x3)) + 1;
3390		mem_tras = ((temp & 0x70) >> 4) + 1;
3391	} else if (rdev->family == CHIP_R300 ||
3392		   rdev->family == CHIP_R350) { /* r300, r350 */
3393		mem_trcd = (temp & 0x7) + 1;
3394		mem_trp = ((temp >> 8) & 0x7) + 1;
3395		mem_tras = ((temp >> 11) & 0xf) + 4;
3396	} else if (rdev->family == CHIP_RV350 ||
3397		   rdev->family <= CHIP_RV380) {
3398		/* rv3x0 */
3399		mem_trcd = (temp & 0x7) + 3;
3400		mem_trp = ((temp >> 8) & 0x7) + 3;
3401		mem_tras = ((temp >> 11) & 0xf) + 6;
3402	} else if (rdev->family == CHIP_R420 ||
3403		   rdev->family == CHIP_R423 ||
3404		   rdev->family == CHIP_RV410) {
3405		/* r4xx */
3406		mem_trcd = (temp & 0xf) + 3;
3407		if (mem_trcd > 15)
3408			mem_trcd = 15;
3409		mem_trp = ((temp >> 8) & 0xf) + 3;
3410		if (mem_trp > 15)
3411			mem_trp = 15;
3412		mem_tras = ((temp >> 12) & 0x1f) + 6;
3413		if (mem_tras > 31)
3414			mem_tras = 31;
3415	} else { /* RV200, R200 */
3416		mem_trcd = (temp & 0x7) + 1;
3417		mem_trp = ((temp >> 8) & 0x7) + 1;
3418		mem_tras = ((temp >> 12) & 0xf) + 4;
3419	}
3420	/* convert to FF */
3421	trcd_ff.full = dfixed_const(mem_trcd);
3422	trp_ff.full = dfixed_const(mem_trp);
3423	tras_ff.full = dfixed_const(mem_tras);
3424
3425	/* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3426	temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
3427	data = (temp & (7 << 20)) >> 20;
3428	if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3429		if (rdev->family == CHIP_RS480) /* don't think rs400 */
3430			tcas_ff = memtcas_rs480_ff[data];
3431		else
3432			tcas_ff = memtcas_ff[data];
3433	} else
3434		tcas_ff = memtcas2_ff[data];
3435
3436	if (rdev->family == CHIP_RS400 ||
3437	    rdev->family == CHIP_RS480) {
3438		/* extra cas latency stored in bits 23-25 0-4 clocks */
3439		data = (temp >> 23) & 0x7;
3440		if (data < 5)
3441			tcas_ff.full += dfixed_const(data);
3442	}
3443
3444	if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3445		/* on the R300, Tcas is included in Trbs.
3446		 */
3447		temp = RREG32(RADEON_MEM_CNTL);
3448		data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3449		if (data == 1) {
3450			if (R300_MEM_USE_CD_CH_ONLY & temp) {
3451				temp = RREG32(R300_MC_IND_INDEX);
3452				temp &= ~R300_MC_IND_ADDR_MASK;
3453				temp |= R300_MC_READ_CNTL_CD_mcind;
3454				WREG32(R300_MC_IND_INDEX, temp);
3455				temp = RREG32(R300_MC_IND_DATA);
3456				data = (R300_MEM_RBS_POSITION_C_MASK & temp);
3457			} else {
3458				temp = RREG32(R300_MC_READ_CNTL_AB);
3459				data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3460			}
3461		} else {
3462			temp = RREG32(R300_MC_READ_CNTL_AB);
3463			data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3464		}
3465		if (rdev->family == CHIP_RV410 ||
3466		    rdev->family == CHIP_R420 ||
3467		    rdev->family == CHIP_R423)
3468			trbs_ff = memtrbs_r4xx[data];
3469		else
3470			trbs_ff = memtrbs[data];
3471		tcas_ff.full += trbs_ff.full;
3472	}
3473
3474	sclk_eff_ff.full = sclk_ff.full;
3475
3476	if (rdev->flags & RADEON_IS_AGP) {
3477		fixed20_12 agpmode_ff;
3478		agpmode_ff.full = dfixed_const(radeon_agpmode);
3479		temp_ff.full = dfixed_const_666(16);
3480		sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3481	}
3482	/* TODO PCIE lanes may affect this - agpmode == 16?? */
3483
3484	if (ASIC_IS_R300(rdev)) {
3485		sclk_delay_ff.full = dfixed_const(250);
3486	} else {
3487		if ((rdev->family == CHIP_RV100) ||
3488		    rdev->flags & RADEON_IS_IGP) {
3489			if (rdev->mc.vram_is_ddr)
3490				sclk_delay_ff.full = dfixed_const(41);
3491			else
3492				sclk_delay_ff.full = dfixed_const(33);
3493		} else {
3494			if (rdev->mc.vram_width == 128)
3495				sclk_delay_ff.full = dfixed_const(57);
3496			else
3497				sclk_delay_ff.full = dfixed_const(41);
3498		}
3499	}
3500
3501	mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3502
3503	if (rdev->mc.vram_is_ddr) {
3504		if (rdev->mc.vram_width == 32) {
3505			k1.full = dfixed_const(40);
3506			c  = 3;
3507		} else {
3508			k1.full = dfixed_const(20);
3509			c  = 1;
3510		}
3511	} else {
3512		k1.full = dfixed_const(40);
3513		c  = 3;
3514	}
3515
3516	temp_ff.full = dfixed_const(2);
3517	mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3518	temp_ff.full = dfixed_const(c);
3519	mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3520	temp_ff.full = dfixed_const(4);
3521	mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3522	mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3523	mc_latency_mclk.full += k1.full;
3524
3525	mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3526	mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3527
3528	/*
3529	  HW cursor time assuming worst case of full size colour cursor.
3530	*/
3531	temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3532	temp_ff.full += trcd_ff.full;
3533	if (temp_ff.full < tras_ff.full)
3534		temp_ff.full = tras_ff.full;
3535	cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3536
3537	temp_ff.full = dfixed_const(cur_size);
3538	cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3539	/*
3540	  Find the total latency for the display data.
3541	*/
3542	disp_latency_overhead.full = dfixed_const(8);
3543	disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3544	mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3545	mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3546
3547	if (mc_latency_mclk.full > mc_latency_sclk.full)
3548		disp_latency.full = mc_latency_mclk.full;
3549	else
3550		disp_latency.full = mc_latency_sclk.full;
3551
3552	/* setup Max GRPH_STOP_REQ default value */
3553	if (ASIC_IS_RV100(rdev))
3554		max_stop_req = 0x5c;
3555	else
3556		max_stop_req = 0x7c;
3557
3558	if (mode1) {
3559		/*  CRTC1
3560		    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3561		    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3562		*/
3563		stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3564
3565		if (stop_req > max_stop_req)
3566			stop_req = max_stop_req;
3567
3568		/*
3569		  Find the drain rate of the display buffer.
3570		*/
3571		temp_ff.full = dfixed_const((16/pixel_bytes1));
3572		disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3573
3574		/*
3575		  Find the critical point of the display buffer.
3576		*/
3577		crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3578		crit_point_ff.full += dfixed_const_half(0);
3579
3580		critical_point = dfixed_trunc(crit_point_ff);
3581
3582		if (rdev->disp_priority == 2) {
3583			critical_point = 0;
3584		}
3585
3586		/*
3587		  The critical point should never be above max_stop_req-4.  Setting
3588		  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3589		*/
3590		if (max_stop_req - critical_point < 4)
3591			critical_point = 0;
3592
3593		if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3594			/* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3595			critical_point = 0x10;
3596		}
3597
3598		temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3599		temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3600		temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3601		temp &= ~(RADEON_GRPH_START_REQ_MASK);
3602		if ((rdev->family == CHIP_R350) &&
3603		    (stop_req > 0x15)) {
3604			stop_req -= 0x10;
3605		}
3606		temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3607		temp |= RADEON_GRPH_BUFFER_SIZE;
3608		temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3609			  RADEON_GRPH_CRITICAL_AT_SOF |
3610			  RADEON_GRPH_STOP_CNTL);
3611		/*
3612		  Write the result into the register.
3613		*/
3614		WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3615						       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3616
3617#if 0
3618		if ((rdev->family == CHIP_RS400) ||
3619		    (rdev->family == CHIP_RS480)) {
3620			/* attempt to program RS400 disp regs correctly ??? */
3621			temp = RREG32(RS400_DISP1_REG_CNTL);
3622			temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3623				  RS400_DISP1_STOP_REQ_LEVEL_MASK);
3624			WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3625						       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3626						       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3627			temp = RREG32(RS400_DMIF_MEM_CNTL1);
3628			temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3629				  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3630			WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3631						      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3632						      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3633		}
3634#endif
3635
3636		DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3637			  /* 	  (unsigned int)info->SavedReg->grph_buffer_cntl, */
3638			  (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3639	}
3640
3641	if (mode2) {
3642		u32 grph2_cntl;
3643		stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3644
3645		if (stop_req > max_stop_req)
3646			stop_req = max_stop_req;
3647
3648		/*
3649		  Find the drain rate of the display buffer.
3650		*/
3651		temp_ff.full = dfixed_const((16/pixel_bytes2));
3652		disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3653
3654		grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3655		grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3656		grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3657		grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3658		if ((rdev->family == CHIP_R350) &&
3659		    (stop_req > 0x15)) {
3660			stop_req -= 0x10;
3661		}
3662		grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3663		grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3664		grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3665			  RADEON_GRPH_CRITICAL_AT_SOF |
3666			  RADEON_GRPH_STOP_CNTL);
3667
3668		if ((rdev->family == CHIP_RS100) ||
3669		    (rdev->family == CHIP_RS200))
3670			critical_point2 = 0;
3671		else {
3672			temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3673			temp_ff.full = dfixed_const(temp);
3674			temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3675			if (sclk_ff.full < temp_ff.full)
3676				temp_ff.full = sclk_ff.full;
3677
3678			read_return_rate.full = temp_ff.full;
3679
3680			if (mode1) {
3681				temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3682				time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3683			} else {
3684				time_disp1_drop_priority.full = 0;
3685			}
3686			crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3687			crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3688			crit_point_ff.full += dfixed_const_half(0);
3689
3690			critical_point2 = dfixed_trunc(crit_point_ff);
3691
3692			if (rdev->disp_priority == 2) {
3693				critical_point2 = 0;
3694			}
3695
3696			if (max_stop_req - critical_point2 < 4)
3697				critical_point2 = 0;
3698
3699		}
3700
3701		if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3702			/* some R300 cards have problem with this set to 0 */
3703			critical_point2 = 0x10;
3704		}
3705
3706		WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3707						  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3708
3709		if ((rdev->family == CHIP_RS400) ||
3710		    (rdev->family == CHIP_RS480)) {
3711#if 0
3712			/* attempt to program RS400 disp2 regs correctly ??? */
3713			temp = RREG32(RS400_DISP2_REQ_CNTL1);
3714			temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3715				  RS400_DISP2_STOP_REQ_LEVEL_MASK);
3716			WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3717						       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3718						       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3719			temp = RREG32(RS400_DISP2_REQ_CNTL2);
3720			temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3721				  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3722			WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3723						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3724						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3725#endif
3726			WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3727			WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3728			WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
3729			WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3730		}
3731
3732		DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3733			  (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3734	}
3735}
3736
3737int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3738{
3739	uint32_t scratch;
3740	uint32_t tmp = 0;
3741	unsigned i;
3742	int r;
3743
3744	r = radeon_scratch_get(rdev, &scratch);
3745	if (r) {
3746		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3747		return r;
3748	}
3749	WREG32(scratch, 0xCAFEDEAD);
3750	r = radeon_ring_lock(rdev, ring, 2);
3751	if (r) {
3752		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3753		radeon_scratch_free(rdev, scratch);
3754		return r;
3755	}
3756	radeon_ring_write(ring, PACKET0(scratch, 0));
3757	radeon_ring_write(ring, 0xDEADBEEF);
3758	radeon_ring_unlock_commit(rdev, ring);
3759	for (i = 0; i < rdev->usec_timeout; i++) {
3760		tmp = RREG32(scratch);
3761		if (tmp == 0xDEADBEEF) {
3762			break;
3763		}
3764		DRM_UDELAY(1);
3765	}
3766	if (i < rdev->usec_timeout) {
3767		DRM_INFO("ring test succeeded in %d usecs\n", i);
3768	} else {
3769		DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3770			  scratch, tmp);
3771		r = -EINVAL;
3772	}
3773	radeon_scratch_free(rdev, scratch);
3774	return r;
3775}
3776
3777void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3778{
3779	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3780
3781	if (ring->rptr_save_reg) {
3782		u32 next_rptr = ring->wptr + 2 + 3;
3783		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3784		radeon_ring_write(ring, next_rptr);
3785	}
3786
3787	radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3788	radeon_ring_write(ring, ib->gpu_addr);
3789	radeon_ring_write(ring, ib->length_dw);
3790}
3791
3792int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3793{
3794	struct radeon_ib ib;
3795	uint32_t scratch;
3796	uint32_t tmp = 0;
3797	unsigned i;
3798	int r;
3799
3800	r = radeon_scratch_get(rdev, &scratch);
3801	if (r) {
3802		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3803		return r;
3804	}
3805	WREG32(scratch, 0xCAFEDEAD);
3806	r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
3807	if (r) {
3808		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3809		goto free_scratch;
3810	}
3811	ib.ptr[0] = PACKET0(scratch, 0);
3812	ib.ptr[1] = 0xDEADBEEF;
3813	ib.ptr[2] = PACKET2(0);
3814	ib.ptr[3] = PACKET2(0);
3815	ib.ptr[4] = PACKET2(0);
3816	ib.ptr[5] = PACKET2(0);
3817	ib.ptr[6] = PACKET2(0);
3818	ib.ptr[7] = PACKET2(0);
3819	ib.length_dw = 8;
3820	r = radeon_ib_schedule(rdev, &ib, NULL);
3821	if (r) {
3822		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3823		goto free_ib;
3824	}
3825	r = radeon_fence_wait(ib.fence, false);
3826	if (r) {
3827		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3828		goto free_ib;
3829	}
3830	for (i = 0; i < rdev->usec_timeout; i++) {
3831		tmp = RREG32(scratch);
3832		if (tmp == 0xDEADBEEF) {
3833			break;
3834		}
3835		DRM_UDELAY(1);
3836	}
3837	if (i < rdev->usec_timeout) {
3838		DRM_INFO("ib test succeeded in %u usecs\n", i);
3839	} else {
3840		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3841			  scratch, tmp);
3842		r = -EINVAL;
3843	}
3844free_ib:
3845	radeon_ib_free(rdev, &ib);
3846free_scratch:
3847	radeon_scratch_free(rdev, scratch);
3848	return r;
3849}
3850
3851void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3852{
3853	/* Shutdown CP we shouldn't need to do that but better be safe than
3854	 * sorry
3855	 */
3856	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3857	WREG32(R_000740_CP_CSQ_CNTL, 0);
3858
3859	/* Save few CRTC registers */
3860	save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3861	save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3862	save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3863	save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3864	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3865		save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3866		save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3867	}
3868
3869	/* Disable VGA aperture access */
3870	WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
3871	/* Disable cursor, overlay, crtc */
3872	WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3873	WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3874					S_000054_CRTC_DISPLAY_DIS(1));
3875	WREG32(R_000050_CRTC_GEN_CNTL,
3876			(C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3877			S_000050_CRTC_DISP_REQ_EN_B(1));
3878	WREG32(R_000420_OV0_SCALE_CNTL,
3879		C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3880	WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3881	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3882		WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3883						S_000360_CUR2_LOCK(1));
3884		WREG32(R_0003F8_CRTC2_GEN_CNTL,
3885			(C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3886			S_0003F8_CRTC2_DISPLAY_DIS(1) |
3887			S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3888		WREG32(R_000360_CUR2_OFFSET,
3889			C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3890	}
3891}
3892
3893void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3894{
3895	/* Update base address for crtc */
3896	WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3897	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3898		WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3899	}
3900	/* Restore CRTC registers */
3901	WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
3902	WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3903	WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3904	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3905		WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3906	}
3907}
3908
3909void r100_vga_render_disable(struct radeon_device *rdev)
3910{
3911	u32 tmp;
3912
3913	tmp = RREG8(R_0003C2_GENMO_WT);
3914	WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3915}
3916
3917static void r100_debugfs(struct radeon_device *rdev)
3918{
3919	int r;
3920
3921	r = r100_debugfs_mc_info_init(rdev);
3922	if (r)
3923		dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n");
3924}
3925
3926static void r100_mc_program(struct radeon_device *rdev)
3927{
3928	struct r100_mc_save save;
3929
3930	/* Stops all mc clients */
3931	r100_mc_stop(rdev, &save);
3932	if (rdev->flags & RADEON_IS_AGP) {
3933		WREG32(R_00014C_MC_AGP_LOCATION,
3934			S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3935			S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3936		WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3937		if (rdev->family > CHIP_RV200)
3938			WREG32(R_00015C_AGP_BASE_2,
3939				upper_32_bits(rdev->mc.agp_base) & 0xff);
3940	} else {
3941		WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3942		WREG32(R_000170_AGP_BASE, 0);
3943		if (rdev->family > CHIP_RV200)
3944			WREG32(R_00015C_AGP_BASE_2, 0);
3945	}
3946	/* Wait for mc idle */
3947	if (r100_mc_wait_for_idle(rdev))
3948		dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3949	/* Program MC, should be a 32bits limited address space */
3950	WREG32(R_000148_MC_FB_LOCATION,
3951		S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3952		S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3953	r100_mc_resume(rdev, &save);
3954}
3955
3956static void r100_clock_startup(struct radeon_device *rdev)
3957{
3958	u32 tmp;
3959
3960	if (radeon_dynclks != -1 && radeon_dynclks)
3961		radeon_legacy_set_clock_gating(rdev, 1);
3962	/* We need to force on some of the block */
3963	tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3964	tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3965	if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3966		tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3967	WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3968}
3969
3970static int r100_startup(struct radeon_device *rdev)
3971{
3972	int r;
3973
3974	/* set common regs */
3975	r100_set_common_regs(rdev);
3976	/* program mc */
3977	r100_mc_program(rdev);
3978	/* Resume clock */
3979	r100_clock_startup(rdev);
3980	/* Initialize GART (initialize after TTM so we can allocate
3981	 * memory through TTM but finalize after TTM) */
3982	r100_enable_bm(rdev);
3983	if (rdev->flags & RADEON_IS_PCI) {
3984		r = r100_pci_gart_enable(rdev);
3985		if (r)
3986			return r;
3987	}
3988
3989	/* allocate wb buffer */
3990	r = radeon_wb_init(rdev);
3991	if (r)
3992		return r;
3993
3994	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3995	if (r) {
3996		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3997		return r;
3998	}
3999
4000	/* Enable IRQ */
4001	r100_irq_set(rdev);
4002	rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
4003	/* 1M ring buffer */
4004	r = r100_cp_init(rdev, 1024 * 1024);
4005	if (r) {
4006		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
4007		return r;
4008	}
4009
4010	r = radeon_ib_pool_init(rdev);
4011	if (r) {
4012		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4013		return r;
4014	}
4015
4016	return 0;
4017}
4018
4019int r100_resume(struct radeon_device *rdev)
4020{
4021	int r;
4022
4023	/* Make sur GART are not working */
4024	if (rdev->flags & RADEON_IS_PCI)
4025		r100_pci_gart_disable(rdev);
4026	/* Resume clock before doing reset */
4027	r100_clock_startup(rdev);
4028	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
4029	if (radeon_asic_reset(rdev)) {
4030		dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4031			RREG32(R_000E40_RBBM_STATUS),
4032			RREG32(R_0007C0_CP_STAT));
4033	}
4034	/* post */
4035	radeon_combios_asic_init(rdev->ddev);
4036	/* Resume clock after posting */
4037	r100_clock_startup(rdev);
4038	/* Initialize surface registers */
4039	radeon_surface_init(rdev);
4040
4041	rdev->accel_working = true;
4042	r = r100_startup(rdev);
4043	if (r) {
4044		rdev->accel_working = false;
4045	}
4046	return r;
4047}
4048
4049int r100_suspend(struct radeon_device *rdev)
4050{
4051	r100_cp_disable(rdev);
4052	radeon_wb_disable(rdev);
4053	r100_irq_disable(rdev);
4054	if (rdev->flags & RADEON_IS_PCI)
4055		r100_pci_gart_disable(rdev);
4056	return 0;
4057}
4058
4059void r100_fini(struct radeon_device *rdev)
4060{
4061	r100_cp_fini(rdev);
4062	radeon_wb_fini(rdev);
4063	radeon_ib_pool_fini(rdev);
4064	radeon_gem_fini(rdev);
4065	if (rdev->flags & RADEON_IS_PCI)
4066		r100_pci_gart_fini(rdev);
4067	radeon_agp_fini(rdev);
4068	radeon_irq_kms_fini(rdev);
4069	radeon_fence_driver_fini(rdev);
4070	radeon_bo_fini(rdev);
4071	radeon_atombios_fini(rdev);
4072	r100_cp_fini_microcode(rdev);
4073	free(rdev->bios, DRM_MEM_DRIVER);
4074	rdev->bios = NULL;
4075}
4076
4077/*
4078 * Due to how kexec works, it can leave the hw fully initialised when it
4079 * boots the new kernel. However doing our init sequence with the CP and
4080 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
4081 * do some quick sanity checks and restore sane values to avoid this
4082 * problem.
4083 */
4084void r100_restore_sanity(struct radeon_device *rdev)
4085{
4086	u32 tmp;
4087
4088	tmp = RREG32(RADEON_CP_CSQ_CNTL);
4089	if (tmp) {
4090		WREG32(RADEON_CP_CSQ_CNTL, 0);
4091	}
4092	tmp = RREG32(RADEON_CP_RB_CNTL);
4093	if (tmp) {
4094		WREG32(RADEON_CP_RB_CNTL, 0);
4095	}
4096	tmp = RREG32(RADEON_SCRATCH_UMSK);
4097	if (tmp) {
4098		WREG32(RADEON_SCRATCH_UMSK, 0);
4099	}
4100}
4101
4102int r100_init(struct radeon_device *rdev)
4103{
4104	int r;
4105
4106	/* Register debugfs file specific to this group of asics */
4107	r100_debugfs(rdev);
4108	/* Disable VGA */
4109	r100_vga_render_disable(rdev);
4110	/* Initialize scratch registers */
4111	radeon_scratch_init(rdev);
4112	/* Initialize surface registers */
4113	radeon_surface_init(rdev);
4114	/* sanity check some register to avoid hangs like after kexec */
4115	r100_restore_sanity(rdev);
4116	/* TODO: disable VGA need to use VGA request */
4117	/* BIOS*/
4118	if (!radeon_get_bios(rdev)) {
4119		if (ASIC_IS_AVIVO(rdev))
4120			return -EINVAL;
4121	}
4122	if (rdev->is_atom_bios) {
4123		dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4124		return -EINVAL;
4125	} else {
4126		r = radeon_combios_init(rdev);
4127		if (r)
4128			return r;
4129	}
4130	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
4131	if (radeon_asic_reset(rdev)) {
4132		dev_warn(rdev->dev,
4133			"GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4134			RREG32(R_000E40_RBBM_STATUS),
4135			RREG32(R_0007C0_CP_STAT));
4136	}
4137	/* check if cards are posted or not */
4138	if (radeon_boot_test_post_card(rdev) == false)
4139		return -EINVAL;
4140	/* Set asic errata */
4141	r100_errata(rdev);
4142	/* Initialize clocks */
4143	radeon_get_clock_info(rdev->ddev);
4144	/* initialize AGP */
4145	if (rdev->flags & RADEON_IS_AGP) {
4146		r = radeon_agp_init(rdev);
4147		if (r) {
4148			radeon_agp_disable(rdev);
4149		}
4150	}
4151	/* initialize VRAM */
4152	r100_mc_init(rdev);
4153	/* Fence driver */
4154	r = radeon_fence_driver_init(rdev);
4155	if (r)
4156		return r;
4157	r = radeon_irq_kms_init(rdev);
4158	if (r)
4159		return r;
4160	/* Memory manager */
4161	r = radeon_bo_init(rdev);
4162	if (r)
4163		return r;
4164	if (rdev->flags & RADEON_IS_PCI) {
4165		r = r100_pci_gart_init(rdev);
4166		if (r)
4167			return r;
4168	}
4169	r100_set_safe_registers(rdev);
4170
4171	rdev->accel_working = true;
4172	r = r100_startup(rdev);
4173	if (r) {
4174		/* Somethings want wront with the accel init stop accel */
4175		dev_err(rdev->dev, "Disabling GPU acceleration\n");
4176		r100_cp_fini(rdev);
4177		radeon_wb_fini(rdev);
4178		radeon_ib_pool_fini(rdev);
4179		radeon_irq_kms_fini(rdev);
4180		if (rdev->flags & RADEON_IS_PCI)
4181			r100_pci_gart_fini(rdev);
4182		rdev->accel_working = false;
4183	}
4184	return 0;
4185}
4186
4187uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
4188		      bool always_indirect)
4189{
4190	if (reg < rdev->rmmio_size && !always_indirect)
4191		return bus_read_4(rdev->rmmio, reg);
4192	else {
4193		unsigned long flags;
4194		uint32_t ret;
4195
4196		DRM_SPINLOCK_IRQSAVE(&rdev->mmio_idx_lock, flags);
4197		bus_write_4(rdev->rmmio, RADEON_MM_INDEX, reg);
4198		ret = bus_read_4(rdev->rmmio, RADEON_MM_DATA);
4199		DRM_SPINUNLOCK_IRQRESTORE(&rdev->mmio_idx_lock, flags);
4200
4201		return ret;
4202	}
4203}
4204
4205void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
4206		  bool always_indirect)
4207{
4208	if (reg < rdev->rmmio_size && !always_indirect)
4209		bus_write_4(rdev->rmmio, reg, v);
4210	else {
4211		unsigned long flags;
4212
4213		DRM_SPINLOCK_IRQSAVE(&rdev->mmio_idx_lock, flags);
4214		bus_write_4(rdev->rmmio, RADEON_MM_INDEX, reg);
4215		bus_write_4(rdev->rmmio, RADEON_MM_DATA, v);
4216		DRM_SPINUNLOCK_IRQRESTORE(&rdev->mmio_idx_lock, flags);
4217	}
4218}
4219
4220u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4221{
4222	if (reg < rdev->rio_mem_size)
4223		return bus_read_4(rdev->rio_mem, reg);
4224	else {
4225		/* XXX No locking? -- dumbbell@ */
4226		bus_write_4(rdev->rio_mem, RADEON_MM_INDEX, reg);
4227		return bus_read_4(rdev->rio_mem, RADEON_MM_DATA);
4228	}
4229}
4230
4231void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4232{
4233	if (reg < rdev->rio_mem_size)
4234		bus_write_4(rdev->rio_mem, reg, v);
4235	else {
4236		/* XXX No locking? -- dumbbell@ */
4237		bus_write_4(rdev->rio_mem, RADEON_MM_INDEX, reg);
4238		bus_write_4(rdev->rio_mem, RADEON_MM_DATA, v);
4239	}
4240}
4241