1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <dev/drm2/drmP.h> 33#include <dev/drm2/radeon/radeon_drm.h> 34#include "radeon_reg.h" 35#include "radeon.h" 36#include "radeon_asic.h" 37#include "r100d.h" 38#include "rs100d.h" 39#include "rv200d.h" 40#include "rv250d.h" 41#include "atom.h" 42 43#include "r100_reg_safe.h" 44#include "rn50_reg_safe.h" 45 46/* Firmware Names */ 47#define FIRMWARE_R100 "radeonkmsfw_R100_cp" 48#define FIRMWARE_R200 "radeonkmsfw_R200_cp" 49#define FIRMWARE_R300 "radeonkmsfw_R300_cp" 50#define FIRMWARE_R420 "radeonkmsfw_R420_cp" 51#define FIRMWARE_RS690 "radeonkmsfw_RS690_cp" 52#define FIRMWARE_RS600 "radeonkmsfw_RS600_cp" 53#define FIRMWARE_R520 "radeonkmsfw_R520_cp" 54 55#ifdef __linux__ 56MODULE_FIRMWARE(FIRMWARE_R100); 57MODULE_FIRMWARE(FIRMWARE_R200); 58MODULE_FIRMWARE(FIRMWARE_R300); 59MODULE_FIRMWARE(FIRMWARE_R420); 60MODULE_FIRMWARE(FIRMWARE_RS690); 61MODULE_FIRMWARE(FIRMWARE_RS600); 62MODULE_FIRMWARE(FIRMWARE_R520); 63#endif 64 65#include "r100_track.h" 66 67/* This files gather functions specifics to: 68 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 69 * and others in some cases. 70 */ 71 72static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc) 73{ 74 if (crtc == 0) { 75 if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR) 76 return true; 77 else 78 return false; 79 } else { 80 if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR) 81 return true; 82 else 83 return false; 84 } 85} 86 87static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc) 88{ 89 u32 vline1, vline2; 90 91 if (crtc == 0) { 92 vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; 93 vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; 94 } else { 95 vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; 96 vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL; 97 } 98 if (vline1 != vline2) 99 return true; 100 else 101 return false; 102} 103 104/** 105 * r100_wait_for_vblank - vblank wait asic callback. 106 * 107 * @rdev: radeon_device pointer 108 * @crtc: crtc to wait for vblank on 109 * 110 * Wait for vblank on the requested crtc (r1xx-r4xx). 111 */ 112void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) 113{ 114 unsigned i = 0; 115 116 if (crtc >= rdev->num_crtc) 117 return; 118 119 if (crtc == 0) { 120 if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN)) 121 return; 122 } else { 123 if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN)) 124 return; 125 } 126 127 /* depending on when we hit vblank, we may be close to active; if so, 128 * wait for another frame. 129 */ 130 while (r100_is_in_vblank(rdev, crtc)) { 131 if (i++ % 100 == 0) { 132 if (!r100_is_counter_moving(rdev, crtc)) 133 break; 134 } 135 } 136 137 while (!r100_is_in_vblank(rdev, crtc)) { 138 if (i++ % 100 == 0) { 139 if (!r100_is_counter_moving(rdev, crtc)) 140 break; 141 } 142 } 143} 144 145/** 146 * r100_pre_page_flip - pre-pageflip callback. 147 * 148 * @rdev: radeon_device pointer 149 * @crtc: crtc to prepare for pageflip on 150 * 151 * Pre-pageflip callback (r1xx-r4xx). 152 * Enables the pageflip irq (vblank irq). 153 */ 154void r100_pre_page_flip(struct radeon_device *rdev, int crtc) 155{ 156 /* enable the pflip int */ 157 radeon_irq_kms_pflip_irq_get(rdev, crtc); 158} 159 160/** 161 * r100_post_page_flip - pos-pageflip callback. 162 * 163 * @rdev: radeon_device pointer 164 * @crtc: crtc to cleanup pageflip on 165 * 166 * Post-pageflip callback (r1xx-r4xx). 167 * Disables the pageflip irq (vblank irq). 168 */ 169void r100_post_page_flip(struct radeon_device *rdev, int crtc) 170{ 171 /* disable the pflip int */ 172 radeon_irq_kms_pflip_irq_put(rdev, crtc); 173} 174 175/** 176 * r100_page_flip - pageflip callback. 177 * 178 * @rdev: radeon_device pointer 179 * @crtc_id: crtc to cleanup pageflip on 180 * @crtc_base: new address of the crtc (GPU MC address) 181 * 182 * Does the actual pageflip (r1xx-r4xx). 183 * During vblank we take the crtc lock and wait for the update_pending 184 * bit to go high, when it does, we release the lock, and allow the 185 * double buffered update to take place. 186 * Returns the current update pending status. 187 */ 188u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) 189{ 190 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; 191 u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK; 192 int i; 193 194 /* Lock the graphics update lock */ 195 /* update the scanout addresses */ 196 WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp); 197 198 /* Wait for update_pending to go high. */ 199 for (i = 0; i < rdev->usec_timeout; i++) { 200 if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET) 201 break; 202 udelay(1); 203 } 204 DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n"); 205 206 /* Unlock the lock, so double-buffering can take place inside vblank */ 207 tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK; 208 WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp); 209 210 /* Return current update_pending status: */ 211 return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET; 212} 213 214/** 215 * r100_pm_get_dynpm_state - look up dynpm power state callback. 216 * 217 * @rdev: radeon_device pointer 218 * 219 * Look up the optimal power state based on the 220 * current state of the GPU (r1xx-r5xx). 221 * Used for dynpm only. 222 */ 223void r100_pm_get_dynpm_state(struct radeon_device *rdev) 224{ 225 int i; 226 rdev->pm.dynpm_can_upclock = true; 227 rdev->pm.dynpm_can_downclock = true; 228 229 switch (rdev->pm.dynpm_planned_action) { 230 case DYNPM_ACTION_MINIMUM: 231 rdev->pm.requested_power_state_index = 0; 232 rdev->pm.dynpm_can_downclock = false; 233 break; 234 case DYNPM_ACTION_DOWNCLOCK: 235 if (rdev->pm.current_power_state_index == 0) { 236 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 237 rdev->pm.dynpm_can_downclock = false; 238 } else { 239 if (rdev->pm.active_crtc_count > 1) { 240 for (i = 0; i < rdev->pm.num_power_states; i++) { 241 if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY) 242 continue; 243 else if (i >= rdev->pm.current_power_state_index) { 244 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 245 break; 246 } else { 247 rdev->pm.requested_power_state_index = i; 248 break; 249 } 250 } 251 } else 252 rdev->pm.requested_power_state_index = 253 rdev->pm.current_power_state_index - 1; 254 } 255 /* don't use the power state if crtcs are active and no display flag is set */ 256 if ((rdev->pm.active_crtc_count > 0) && 257 (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags & 258 RADEON_PM_MODE_NO_DISPLAY)) { 259 rdev->pm.requested_power_state_index++; 260 } 261 break; 262 case DYNPM_ACTION_UPCLOCK: 263 if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) { 264 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 265 rdev->pm.dynpm_can_upclock = false; 266 } else { 267 if (rdev->pm.active_crtc_count > 1) { 268 for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) { 269 if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY) 270 continue; 271 else if (i <= rdev->pm.current_power_state_index) { 272 rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index; 273 break; 274 } else { 275 rdev->pm.requested_power_state_index = i; 276 break; 277 } 278 } 279 } else 280 rdev->pm.requested_power_state_index = 281 rdev->pm.current_power_state_index + 1; 282 } 283 break; 284 case DYNPM_ACTION_DEFAULT: 285 rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index; 286 rdev->pm.dynpm_can_upclock = false; 287 break; 288 case DYNPM_ACTION_NONE: 289 default: 290 DRM_ERROR("Requested mode for not defined action\n"); 291 return; 292 } 293 /* only one clock mode per power state */ 294 rdev->pm.requested_clock_mode_index = 0; 295 296 DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n", 297 rdev->pm.power_state[rdev->pm.requested_power_state_index]. 298 clock_info[rdev->pm.requested_clock_mode_index].sclk, 299 rdev->pm.power_state[rdev->pm.requested_power_state_index]. 300 clock_info[rdev->pm.requested_clock_mode_index].mclk, 301 rdev->pm.power_state[rdev->pm.requested_power_state_index]. 302 pcie_lanes); 303} 304 305/** 306 * r100_pm_init_profile - Initialize power profiles callback. 307 * 308 * @rdev: radeon_device pointer 309 * 310 * Initialize the power states used in profile mode 311 * (r1xx-r3xx). 312 * Used for profile mode only. 313 */ 314void r100_pm_init_profile(struct radeon_device *rdev) 315{ 316 /* default */ 317 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index; 318 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 319 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0; 320 rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0; 321 /* low sh */ 322 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0; 323 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0; 324 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0; 325 rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0; 326 /* mid sh */ 327 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0; 328 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0; 329 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0; 330 rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0; 331 /* high sh */ 332 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0; 333 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 334 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0; 335 rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0; 336 /* low mh */ 337 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0; 338 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 339 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0; 340 rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0; 341 /* mid mh */ 342 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0; 343 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 344 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0; 345 rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0; 346 /* high mh */ 347 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0; 348 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index; 349 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0; 350 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0; 351} 352 353/** 354 * r100_pm_misc - set additional pm hw parameters callback. 355 * 356 * @rdev: radeon_device pointer 357 * 358 * Set non-clock parameters associated with a power state 359 * (voltage, pcie lanes, etc.) (r1xx-r4xx). 360 */ 361void r100_pm_misc(struct radeon_device *rdev) 362{ 363 int requested_index = rdev->pm.requested_power_state_index; 364 struct radeon_power_state *ps = &rdev->pm.power_state[requested_index]; 365 struct radeon_voltage *voltage = &ps->clock_info[0].voltage; 366 u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl; 367 368 if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) { 369 if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) { 370 tmp = RREG32(voltage->gpio.reg); 371 if (voltage->active_high) 372 tmp |= voltage->gpio.mask; 373 else 374 tmp &= ~(voltage->gpio.mask); 375 WREG32(voltage->gpio.reg, tmp); 376 if (voltage->delay) 377 udelay(voltage->delay); 378 } else { 379 tmp = RREG32(voltage->gpio.reg); 380 if (voltage->active_high) 381 tmp &= ~voltage->gpio.mask; 382 else 383 tmp |= voltage->gpio.mask; 384 WREG32(voltage->gpio.reg, tmp); 385 if (voltage->delay) 386 udelay(voltage->delay); 387 } 388 } 389 390 sclk_cntl = RREG32_PLL(SCLK_CNTL); 391 sclk_cntl2 = RREG32_PLL(SCLK_CNTL2); 392 sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3); 393 sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL); 394 sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3); 395 if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) { 396 sclk_more_cntl |= REDUCED_SPEED_SCLK_EN; 397 if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE) 398 sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE; 399 else 400 sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE; 401 if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2) 402 sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0); 403 else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4) 404 sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2); 405 } else 406 sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN; 407 408 if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) { 409 sclk_more_cntl |= IO_CG_VOLTAGE_DROP; 410 if (voltage->delay) { 411 sclk_more_cntl |= VOLTAGE_DROP_SYNC; 412 switch (voltage->delay) { 413 case 33: 414 sclk_more_cntl |= VOLTAGE_DELAY_SEL(0); 415 break; 416 case 66: 417 sclk_more_cntl |= VOLTAGE_DELAY_SEL(1); 418 break; 419 case 99: 420 sclk_more_cntl |= VOLTAGE_DELAY_SEL(2); 421 break; 422 case 132: 423 sclk_more_cntl |= VOLTAGE_DELAY_SEL(3); 424 break; 425 } 426 } else 427 sclk_more_cntl &= ~VOLTAGE_DROP_SYNC; 428 } else 429 sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP; 430 431 if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN) 432 sclk_cntl &= ~FORCE_HDP; 433 else 434 sclk_cntl |= FORCE_HDP; 435 436 WREG32_PLL(SCLK_CNTL, sclk_cntl); 437 WREG32_PLL(SCLK_CNTL2, sclk_cntl2); 438 WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl); 439 440 /* set pcie lanes */ 441 if ((rdev->flags & RADEON_IS_PCIE) && 442 !(rdev->flags & RADEON_IS_IGP) && 443 rdev->asic->pm.set_pcie_lanes && 444 (ps->pcie_lanes != 445 rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) { 446 radeon_set_pcie_lanes(rdev, 447 ps->pcie_lanes); 448 DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes); 449 } 450} 451 452/** 453 * r100_pm_prepare - pre-power state change callback. 454 * 455 * @rdev: radeon_device pointer 456 * 457 * Prepare for a power state change (r1xx-r4xx). 458 */ 459void r100_pm_prepare(struct radeon_device *rdev) 460{ 461 struct drm_device *ddev = rdev->ddev; 462 struct drm_crtc *crtc; 463 struct radeon_crtc *radeon_crtc; 464 u32 tmp; 465 466 /* disable any active CRTCs */ 467 list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) { 468 radeon_crtc = to_radeon_crtc(crtc); 469 if (radeon_crtc->enabled) { 470 if (radeon_crtc->crtc_id) { 471 tmp = RREG32(RADEON_CRTC2_GEN_CNTL); 472 tmp |= RADEON_CRTC2_DISP_REQ_EN_B; 473 WREG32(RADEON_CRTC2_GEN_CNTL, tmp); 474 } else { 475 tmp = RREG32(RADEON_CRTC_GEN_CNTL); 476 tmp |= RADEON_CRTC_DISP_REQ_EN_B; 477 WREG32(RADEON_CRTC_GEN_CNTL, tmp); 478 } 479 } 480 } 481} 482 483/** 484 * r100_pm_finish - post-power state change callback. 485 * 486 * @rdev: radeon_device pointer 487 * 488 * Clean up after a power state change (r1xx-r4xx). 489 */ 490void r100_pm_finish(struct radeon_device *rdev) 491{ 492 struct drm_device *ddev = rdev->ddev; 493 struct drm_crtc *crtc; 494 struct radeon_crtc *radeon_crtc; 495 u32 tmp; 496 497 /* enable any active CRTCs */ 498 list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) { 499 radeon_crtc = to_radeon_crtc(crtc); 500 if (radeon_crtc->enabled) { 501 if (radeon_crtc->crtc_id) { 502 tmp = RREG32(RADEON_CRTC2_GEN_CNTL); 503 tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B; 504 WREG32(RADEON_CRTC2_GEN_CNTL, tmp); 505 } else { 506 tmp = RREG32(RADEON_CRTC_GEN_CNTL); 507 tmp &= ~RADEON_CRTC_DISP_REQ_EN_B; 508 WREG32(RADEON_CRTC_GEN_CNTL, tmp); 509 } 510 } 511 } 512} 513 514/** 515 * r100_gui_idle - gui idle callback. 516 * 517 * @rdev: radeon_device pointer 518 * 519 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx). 520 * Returns true if idle, false if not. 521 */ 522bool r100_gui_idle(struct radeon_device *rdev) 523{ 524 if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE) 525 return false; 526 else 527 return true; 528} 529 530/* hpd for digital panel detect/disconnect */ 531/** 532 * r100_hpd_sense - hpd sense callback. 533 * 534 * @rdev: radeon_device pointer 535 * @hpd: hpd (hotplug detect) pin 536 * 537 * Checks if a digital monitor is connected (r1xx-r4xx). 538 * Returns true if connected, false if not connected. 539 */ 540bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd) 541{ 542 bool connected = false; 543 544 switch (hpd) { 545 case RADEON_HPD_1: 546 if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE) 547 connected = true; 548 break; 549 case RADEON_HPD_2: 550 if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE) 551 connected = true; 552 break; 553 default: 554 break; 555 } 556 return connected; 557} 558 559/** 560 * r100_hpd_set_polarity - hpd set polarity callback. 561 * 562 * @rdev: radeon_device pointer 563 * @hpd: hpd (hotplug detect) pin 564 * 565 * Set the polarity of the hpd pin (r1xx-r4xx). 566 */ 567void r100_hpd_set_polarity(struct radeon_device *rdev, 568 enum radeon_hpd_id hpd) 569{ 570 u32 tmp; 571 bool connected = r100_hpd_sense(rdev, hpd); 572 573 switch (hpd) { 574 case RADEON_HPD_1: 575 tmp = RREG32(RADEON_FP_GEN_CNTL); 576 if (connected) 577 tmp &= ~RADEON_FP_DETECT_INT_POL; 578 else 579 tmp |= RADEON_FP_DETECT_INT_POL; 580 WREG32(RADEON_FP_GEN_CNTL, tmp); 581 break; 582 case RADEON_HPD_2: 583 tmp = RREG32(RADEON_FP2_GEN_CNTL); 584 if (connected) 585 tmp &= ~RADEON_FP2_DETECT_INT_POL; 586 else 587 tmp |= RADEON_FP2_DETECT_INT_POL; 588 WREG32(RADEON_FP2_GEN_CNTL, tmp); 589 break; 590 default: 591 break; 592 } 593} 594 595/** 596 * r100_hpd_init - hpd setup callback. 597 * 598 * @rdev: radeon_device pointer 599 * 600 * Setup the hpd pins used by the card (r1xx-r4xx). 601 * Set the polarity, and enable the hpd interrupts. 602 */ 603void r100_hpd_init(struct radeon_device *rdev) 604{ 605 struct drm_device *dev = rdev->ddev; 606 struct drm_connector *connector; 607 unsigned enable = 0; 608 609 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 610 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 611 enable |= 1 << radeon_connector->hpd.hpd; 612 radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); 613 } 614 radeon_irq_kms_enable_hpd(rdev, enable); 615} 616 617/** 618 * r100_hpd_fini - hpd tear down callback. 619 * 620 * @rdev: radeon_device pointer 621 * 622 * Tear down the hpd pins used by the card (r1xx-r4xx). 623 * Disable the hpd interrupts. 624 */ 625void r100_hpd_fini(struct radeon_device *rdev) 626{ 627 struct drm_device *dev = rdev->ddev; 628 struct drm_connector *connector; 629 unsigned disable = 0; 630 631 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 632 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 633 disable |= 1 << radeon_connector->hpd.hpd; 634 } 635 radeon_irq_kms_disable_hpd(rdev, disable); 636} 637 638/* 639 * PCI GART 640 */ 641void r100_pci_gart_tlb_flush(struct radeon_device *rdev) 642{ 643 /* TODO: can we do somethings here ? */ 644 /* It seems hw only cache one entry so we should discard this 645 * entry otherwise if first GPU GART read hit this entry it 646 * could end up in wrong address. */ 647} 648 649int r100_pci_gart_init(struct radeon_device *rdev) 650{ 651 int r; 652 653 if (rdev->gart.ptr) { 654 DRM_ERROR("R100 PCI GART already initialized\n"); 655 return 0; 656 } 657 /* Initialize common gart structure */ 658 r = radeon_gart_init(rdev); 659 if (r) 660 return r; 661 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; 662 rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush; 663 rdev->asic->gart.set_page = &r100_pci_gart_set_page; 664 return radeon_gart_table_ram_alloc(rdev); 665} 666 667int r100_pci_gart_enable(struct radeon_device *rdev) 668{ 669 uint32_t tmp; 670 671 radeon_gart_restore(rdev); 672 /* discard memory request outside of configured range */ 673 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 674 WREG32(RADEON_AIC_CNTL, tmp); 675 /* set address range for PCI address translate */ 676 WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start); 677 WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end); 678 /* set PCI GART page-table base address */ 679 WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr); 680 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN; 681 WREG32(RADEON_AIC_CNTL, tmp); 682 r100_pci_gart_tlb_flush(rdev); 683 DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n", 684 (unsigned)(rdev->mc.gtt_size >> 20), 685 (unsigned long long)rdev->gart.table_addr); 686 rdev->gart.ready = true; 687 return 0; 688} 689 690void r100_pci_gart_disable(struct radeon_device *rdev) 691{ 692 uint32_t tmp; 693 694 /* discard memory request outside of configured range */ 695 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; 696 WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN); 697 WREG32(RADEON_AIC_LO_ADDR, 0); 698 WREG32(RADEON_AIC_HI_ADDR, 0); 699} 700 701int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) 702{ 703 u32 *gtt = rdev->gart.ptr; 704 705 if (i < 0 || i > rdev->gart.num_gpu_pages) { 706 return -EINVAL; 707 } 708 gtt[i] = cpu_to_le32(lower_32_bits(addr)); 709 return 0; 710} 711 712void r100_pci_gart_fini(struct radeon_device *rdev) 713{ 714 radeon_gart_fini(rdev); 715 r100_pci_gart_disable(rdev); 716 radeon_gart_table_ram_free(rdev); 717} 718 719int r100_irq_set(struct radeon_device *rdev) 720{ 721 uint32_t tmp = 0; 722 723 if (!rdev->irq.installed) { 724 DRM_ERROR("Can't enable IRQ/MSI because no handler is installed\n"); 725 WREG32(R_000040_GEN_INT_CNTL, 0); 726 return -EINVAL; 727 } 728 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 729 tmp |= RADEON_SW_INT_ENABLE; 730 } 731 if (rdev->irq.crtc_vblank_int[0] || 732 atomic_read(&rdev->irq.pflip[0])) { 733 tmp |= RADEON_CRTC_VBLANK_MASK; 734 } 735 if (rdev->irq.crtc_vblank_int[1] || 736 atomic_read(&rdev->irq.pflip[1])) { 737 tmp |= RADEON_CRTC2_VBLANK_MASK; 738 } 739 if (rdev->irq.hpd[0]) { 740 tmp |= RADEON_FP_DETECT_MASK; 741 } 742 if (rdev->irq.hpd[1]) { 743 tmp |= RADEON_FP2_DETECT_MASK; 744 } 745 WREG32(RADEON_GEN_INT_CNTL, tmp); 746 return 0; 747} 748 749void r100_irq_disable(struct radeon_device *rdev) 750{ 751 u32 tmp; 752 753 WREG32(R_000040_GEN_INT_CNTL, 0); 754 /* Wait and acknowledge irq */ 755 mdelay(1); 756 tmp = RREG32(R_000044_GEN_INT_STATUS); 757 WREG32(R_000044_GEN_INT_STATUS, tmp); 758} 759 760static uint32_t r100_irq_ack(struct radeon_device *rdev) 761{ 762 uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS); 763 uint32_t irq_mask = RADEON_SW_INT_TEST | 764 RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT | 765 RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT; 766 767 if (irqs) { 768 WREG32(RADEON_GEN_INT_STATUS, irqs); 769 } 770 return irqs & irq_mask; 771} 772 773irqreturn_t r100_irq_process(struct radeon_device *rdev) 774{ 775 uint32_t status, msi_rearm; 776 bool queue_hotplug = false; 777 778 status = r100_irq_ack(rdev); 779 if (!status) { 780 return IRQ_NONE; 781 } 782 if (rdev->shutdown) { 783 return IRQ_NONE; 784 } 785 while (status) { 786 /* SW interrupt */ 787 if (status & RADEON_SW_INT_TEST) { 788 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 789 } 790 /* Vertical blank interrupts */ 791 if (status & RADEON_CRTC_VBLANK_STAT) { 792 if (rdev->irq.crtc_vblank_int[0]) { 793 drm_handle_vblank(rdev->ddev, 0); 794 rdev->pm.vblank_sync = true; 795 DRM_WAKEUP(&rdev->irq.vblank_queue); 796 } 797 if (atomic_read(&rdev->irq.pflip[0])) 798 radeon_crtc_handle_flip(rdev, 0); 799 } 800 if (status & RADEON_CRTC2_VBLANK_STAT) { 801 if (rdev->irq.crtc_vblank_int[1]) { 802 drm_handle_vblank(rdev->ddev, 1); 803 rdev->pm.vblank_sync = true; 804 DRM_WAKEUP(&rdev->irq.vblank_queue); 805 } 806 if (atomic_read(&rdev->irq.pflip[1])) 807 radeon_crtc_handle_flip(rdev, 1); 808 } 809 if (status & RADEON_FP_DETECT_STAT) { 810 queue_hotplug = true; 811 DRM_DEBUG("HPD1\n"); 812 } 813 if (status & RADEON_FP2_DETECT_STAT) { 814 queue_hotplug = true; 815 DRM_DEBUG("HPD2\n"); 816 } 817 status = r100_irq_ack(rdev); 818 } 819 if (queue_hotplug) 820 taskqueue_enqueue(rdev->tq, &rdev->hotplug_work); 821 if (rdev->msi_enabled) { 822 switch (rdev->family) { 823 case CHIP_RS400: 824 case CHIP_RS480: 825 msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM; 826 WREG32(RADEON_AIC_CNTL, msi_rearm); 827 WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM); 828 break; 829 default: 830 WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN); 831 break; 832 } 833 } 834 return IRQ_HANDLED; 835} 836 837u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) 838{ 839 if (crtc == 0) 840 return RREG32(RADEON_CRTC_CRNT_FRAME); 841 else 842 return RREG32(RADEON_CRTC2_CRNT_FRAME); 843} 844 845/* Who ever call radeon_fence_emit should call ring_lock and ask 846 * for enough space (today caller are ib schedule and buffer move) */ 847void r100_fence_ring_emit(struct radeon_device *rdev, 848 struct radeon_fence *fence) 849{ 850 struct radeon_ring *ring = &rdev->ring[fence->ring]; 851 852 /* We have to make sure that caches are flushed before 853 * CPU might read something from VRAM. */ 854 radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); 855 radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL); 856 radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); 857 radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL); 858 /* Wait until IDLE & CLEAN */ 859 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 860 radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); 861 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); 862 radeon_ring_write(ring, rdev->config.r100.hdp_cntl | 863 RADEON_HDP_READ_BUFFER_INVALIDATE); 864 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); 865 radeon_ring_write(ring, rdev->config.r100.hdp_cntl); 866 /* Emit fence sequence & fire IRQ */ 867 radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); 868 radeon_ring_write(ring, fence->seq); 869 radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0)); 870 radeon_ring_write(ring, RADEON_SW_INT_FIRE); 871} 872 873void r100_semaphore_ring_emit(struct radeon_device *rdev, 874 struct radeon_ring *ring, 875 struct radeon_semaphore *semaphore, 876 bool emit_wait) 877{ 878 /* Unused on older asics, since we don't have semaphores or multiple rings */ 879 panic("%s: Unused on older asics", __func__); 880} 881 882int r100_copy_blit(struct radeon_device *rdev, 883 uint64_t src_offset, 884 uint64_t dst_offset, 885 unsigned num_gpu_pages, 886 struct radeon_fence **fence) 887{ 888 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 889 uint32_t cur_pages; 890 uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE; 891 uint32_t pitch; 892 uint32_t stride_pixels; 893 unsigned ndw; 894 int num_loops; 895 int r = 0; 896 897 /* radeon limited to 16k stride */ 898 stride_bytes &= 0x3fff; 899 /* radeon pitch is /64 */ 900 pitch = stride_bytes / 64; 901 stride_pixels = stride_bytes / 4; 902 num_loops = DIV_ROUND_UP(num_gpu_pages, 8191); 903 904 /* Ask for enough room for blit + flush + fence */ 905 ndw = 64 + (10 * num_loops); 906 r = radeon_ring_lock(rdev, ring, ndw); 907 if (r) { 908 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw); 909 return -EINVAL; 910 } 911 while (num_gpu_pages > 0) { 912 cur_pages = num_gpu_pages; 913 if (cur_pages > 8191) { 914 cur_pages = 8191; 915 } 916 num_gpu_pages -= cur_pages; 917 918 /* pages are in Y direction - height 919 page width in X direction - width */ 920 radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8)); 921 radeon_ring_write(ring, 922 RADEON_GMC_SRC_PITCH_OFFSET_CNTL | 923 RADEON_GMC_DST_PITCH_OFFSET_CNTL | 924 RADEON_GMC_SRC_CLIPPING | 925 RADEON_GMC_DST_CLIPPING | 926 RADEON_GMC_BRUSH_NONE | 927 (RADEON_COLOR_FORMAT_ARGB8888 << 8) | 928 RADEON_GMC_SRC_DATATYPE_COLOR | 929 RADEON_ROP3_S | 930 RADEON_DP_SRC_SOURCE_MEMORY | 931 RADEON_GMC_CLR_CMP_CNTL_DIS | 932 RADEON_GMC_WR_MSK_DIS); 933 radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10)); 934 radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10)); 935 radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); 936 radeon_ring_write(ring, 0); 937 radeon_ring_write(ring, (0x1fff) | (0x1fff << 16)); 938 radeon_ring_write(ring, num_gpu_pages); 939 radeon_ring_write(ring, num_gpu_pages); 940 radeon_ring_write(ring, cur_pages | (stride_pixels << 16)); 941 } 942 radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0)); 943 radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL); 944 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 945 radeon_ring_write(ring, 946 RADEON_WAIT_2D_IDLECLEAN | 947 RADEON_WAIT_HOST_IDLECLEAN | 948 RADEON_WAIT_DMA_GUI_IDLE); 949 if (fence) { 950 r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); 951 } 952 radeon_ring_unlock_commit(rdev, ring); 953 return r; 954} 955 956static int r100_cp_wait_for_idle(struct radeon_device *rdev) 957{ 958 unsigned i; 959 u32 tmp; 960 961 for (i = 0; i < rdev->usec_timeout; i++) { 962 tmp = RREG32(R_000E40_RBBM_STATUS); 963 if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) { 964 return 0; 965 } 966 udelay(1); 967 } 968 return -1; 969} 970 971void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring) 972{ 973 int r; 974 975 r = radeon_ring_lock(rdev, ring, 2); 976 if (r) { 977 return; 978 } 979 radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0)); 980 radeon_ring_write(ring, 981 RADEON_ISYNC_ANY2D_IDLE3D | 982 RADEON_ISYNC_ANY3D_IDLE2D | 983 RADEON_ISYNC_WAIT_IDLEGUI | 984 RADEON_ISYNC_CPSCRATCH_IDLEGUI); 985 radeon_ring_unlock_commit(rdev, ring); 986} 987 988 989/* Load the microcode for the CP */ 990static int r100_cp_init_microcode(struct radeon_device *rdev) 991{ 992 const char *fw_name = NULL; 993 int err; 994 995 DRM_DEBUG_KMS("\n"); 996 997 if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || 998 (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || 999 (rdev->family == CHIP_RS200)) { 1000 DRM_INFO("Loading R100 Microcode\n"); 1001 fw_name = FIRMWARE_R100; 1002 } else if ((rdev->family == CHIP_R200) || 1003 (rdev->family == CHIP_RV250) || 1004 (rdev->family == CHIP_RV280) || 1005 (rdev->family == CHIP_RS300)) { 1006 DRM_INFO("Loading R200 Microcode\n"); 1007 fw_name = FIRMWARE_R200; 1008 } else if ((rdev->family == CHIP_R300) || 1009 (rdev->family == CHIP_R350) || 1010 (rdev->family == CHIP_RV350) || 1011 (rdev->family == CHIP_RV380) || 1012 (rdev->family == CHIP_RS400) || 1013 (rdev->family == CHIP_RS480)) { 1014 DRM_INFO("Loading R300 Microcode\n"); 1015 fw_name = FIRMWARE_R300; 1016 } else if ((rdev->family == CHIP_R420) || 1017 (rdev->family == CHIP_R423) || 1018 (rdev->family == CHIP_RV410)) { 1019 DRM_INFO("Loading R400 Microcode\n"); 1020 fw_name = FIRMWARE_R420; 1021 } else if ((rdev->family == CHIP_RS690) || 1022 (rdev->family == CHIP_RS740)) { 1023 DRM_INFO("Loading RS690/RS740 Microcode\n"); 1024 fw_name = FIRMWARE_RS690; 1025 } else if (rdev->family == CHIP_RS600) { 1026 DRM_INFO("Loading RS600 Microcode\n"); 1027 fw_name = FIRMWARE_RS600; 1028 } else if ((rdev->family == CHIP_RV515) || 1029 (rdev->family == CHIP_R520) || 1030 (rdev->family == CHIP_RV530) || 1031 (rdev->family == CHIP_R580) || 1032 (rdev->family == CHIP_RV560) || 1033 (rdev->family == CHIP_RV570)) { 1034 DRM_INFO("Loading R500 Microcode\n"); 1035 fw_name = FIRMWARE_R520; 1036 } 1037 1038 err = 0; 1039 rdev->me_fw = firmware_get(fw_name); 1040 if (rdev->me_fw == NULL) { 1041 DRM_ERROR("radeon_cp: Failed to load firmware \"%s\"\n", 1042 fw_name); 1043 err = -ENOENT; 1044 } else if (rdev->me_fw->datasize % 8) { 1045 DRM_ERROR( 1046 "radeon_cp: Bogus length %zu in firmware \"%s\"\n", 1047 rdev->me_fw->datasize, fw_name); 1048 err = -EINVAL; 1049 firmware_put(rdev->me_fw, FIRMWARE_UNLOAD); 1050 rdev->me_fw = NULL; 1051 } 1052 return err; 1053} 1054 1055/** 1056 * r100_cp_fini_microcode - drop the firmware image reference 1057 * 1058 * @rdev: radeon_device pointer 1059 * 1060 * Drop the me firmware image reference. 1061 * Called at driver shutdown. 1062 */ 1063static void r100_cp_fini_microcode (struct radeon_device *rdev) 1064{ 1065 1066 if (rdev->me_fw != NULL) { 1067 firmware_put(rdev->me_fw, FIRMWARE_UNLOAD); 1068 rdev->me_fw = NULL; 1069 } 1070} 1071 1072static void r100_cp_load_microcode(struct radeon_device *rdev) 1073{ 1074 const __be32 *fw_data; 1075 int i, size; 1076 1077 if (r100_gui_wait_for_idle(rdev)) { 1078 DRM_ERROR("Failed to wait GUI idle while " 1079 "programming pipes. Bad things might happen.\n"); 1080 } 1081 1082 if (rdev->me_fw) { 1083 size = rdev->me_fw->datasize / 4; 1084 fw_data = (const __be32 *)rdev->me_fw->data; 1085 WREG32(RADEON_CP_ME_RAM_ADDR, 0); 1086 for (i = 0; i < size; i += 2) { 1087 WREG32(RADEON_CP_ME_RAM_DATAH, 1088 be32_to_cpup(&fw_data[i])); 1089 WREG32(RADEON_CP_ME_RAM_DATAL, 1090 be32_to_cpup(&fw_data[i + 1])); 1091 } 1092 } 1093} 1094 1095int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) 1096{ 1097 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1098 unsigned rb_bufsz; 1099 unsigned rb_blksz; 1100 unsigned max_fetch; 1101 unsigned pre_write_timer; 1102 unsigned pre_write_limit; 1103 unsigned indirect2_start; 1104 unsigned indirect1_start; 1105 uint32_t tmp; 1106 int r; 1107 1108 if (r100_debugfs_cp_init(rdev)) { 1109 DRM_ERROR("Failed to register debugfs file for CP !\n"); 1110 } 1111 if (!rdev->me_fw) { 1112 r = r100_cp_init_microcode(rdev); 1113 if (r) { 1114 DRM_ERROR("Failed to load firmware!\n"); 1115 return r; 1116 } 1117 } 1118 1119 /* Align ring size */ 1120 rb_bufsz = drm_order(ring_size / 8); 1121 ring_size = (1 << (rb_bufsz + 1)) * 4; 1122 r100_cp_load_microcode(rdev); 1123 r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET, 1124 RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR, 1125 0, 0x7fffff, RADEON_CP_PACKET2); 1126 if (r) { 1127 return r; 1128 } 1129 /* Each time the cp read 1024 bytes (16 dword/quadword) update 1130 * the rptr copy in system ram */ 1131 rb_blksz = 9; 1132 /* cp will read 128bytes at a time (4 dwords) */ 1133 max_fetch = 1; 1134 ring->align_mask = 16 - 1; 1135 /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */ 1136 pre_write_timer = 64; 1137 /* Force CP_RB_WPTR write if written more than one time before the 1138 * delay expire 1139 */ 1140 pre_write_limit = 0; 1141 /* Setup the cp cache like this (cache size is 96 dwords) : 1142 * RING 0 to 15 1143 * INDIRECT1 16 to 79 1144 * INDIRECT2 80 to 95 1145 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 1146 * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords)) 1147 * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords)) 1148 * Idea being that most of the gpu cmd will be through indirect1 buffer 1149 * so it gets the bigger cache. 1150 */ 1151 indirect2_start = 80; 1152 indirect1_start = 16; 1153 /* cp setup */ 1154 WREG32(0x718, pre_write_timer | (pre_write_limit << 28)); 1155 tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | 1156 REG_SET(RADEON_RB_BLKSZ, rb_blksz) | 1157 REG_SET(RADEON_MAX_FETCH, max_fetch)); 1158#ifdef __BIG_ENDIAN 1159 tmp |= RADEON_BUF_SWAP_32BIT; 1160#endif 1161 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE); 1162 1163 /* Set ring address */ 1164 DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr); 1165 WREG32(RADEON_CP_RB_BASE, ring->gpu_addr); 1166 /* Force read & write ptr to 0 */ 1167 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE); 1168 WREG32(RADEON_CP_RB_RPTR_WR, 0); 1169 ring->wptr = 0; 1170 WREG32(RADEON_CP_RB_WPTR, ring->wptr); 1171 1172 /* set the wb address whether it's enabled or not */ 1173 WREG32(R_00070C_CP_RB_RPTR_ADDR, 1174 S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2)); 1175 WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET); 1176 1177 if (rdev->wb.enabled) 1178 WREG32(R_000770_SCRATCH_UMSK, 0xff); 1179 else { 1180 tmp |= RADEON_RB_NO_UPDATE; 1181 WREG32(R_000770_SCRATCH_UMSK, 0); 1182 } 1183 1184 WREG32(RADEON_CP_RB_CNTL, tmp); 1185 udelay(10); 1186 ring->rptr = RREG32(RADEON_CP_RB_RPTR); 1187 /* Set cp mode to bus mastering & enable cp*/ 1188 WREG32(RADEON_CP_CSQ_MODE, 1189 REG_SET(RADEON_INDIRECT2_START, indirect2_start) | 1190 REG_SET(RADEON_INDIRECT1_START, indirect1_start)); 1191 WREG32(RADEON_CP_RB_WPTR_DELAY, 0); 1192 WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D); 1193 WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM); 1194 1195 /* at this point everything should be setup correctly to enable master */ 1196 pci_enable_busmaster(rdev->dev); 1197 1198 radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 1199 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring); 1200 if (r) { 1201 DRM_ERROR("radeon: cp isn't working (%d).\n", r); 1202 return r; 1203 } 1204 ring->ready = true; 1205 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 1206 1207 if (!ring->rptr_save_reg /* not resuming from suspend */ 1208 && radeon_ring_supports_scratch_reg(rdev, ring)) { 1209 r = radeon_scratch_get(rdev, &ring->rptr_save_reg); 1210 if (r) { 1211 DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r); 1212 ring->rptr_save_reg = 0; 1213 } 1214 } 1215 return 0; 1216} 1217 1218void r100_cp_fini(struct radeon_device *rdev) 1219{ 1220 if (r100_cp_wait_for_idle(rdev)) { 1221 DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n"); 1222 } 1223 /* Disable ring */ 1224 r100_cp_disable(rdev); 1225 radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg); 1226 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 1227 DRM_INFO("radeon: cp finalized\n"); 1228} 1229 1230void r100_cp_disable(struct radeon_device *rdev) 1231{ 1232 /* Disable ring */ 1233 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1234 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1235 WREG32(RADEON_CP_CSQ_MODE, 0); 1236 WREG32(RADEON_CP_CSQ_CNTL, 0); 1237 WREG32(R_000770_SCRATCH_UMSK, 0); 1238 if (r100_gui_wait_for_idle(rdev)) { 1239 DRM_ERROR("Failed to wait GUI idle while " 1240 "programming pipes. Bad things might happen.\n"); 1241 } 1242} 1243 1244/* 1245 * CS functions 1246 */ 1247int r100_reloc_pitch_offset(struct radeon_cs_parser *p, 1248 struct radeon_cs_packet *pkt, 1249 unsigned idx, 1250 unsigned reg) 1251{ 1252 int r; 1253 u32 tile_flags = 0; 1254 u32 tmp; 1255 struct radeon_cs_reloc *reloc; 1256 u32 value; 1257 1258 r = r100_cs_packet_next_reloc(p, &reloc); 1259 if (r) { 1260 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1261 idx, reg); 1262 r100_cs_dump_packet(p, pkt); 1263 return r; 1264 } 1265 1266 value = radeon_get_ib_value(p, idx); 1267 tmp = value & 0x003fffff; 1268 tmp += (((u32)reloc->lobj.gpu_offset) >> 10); 1269 1270 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1271 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1272 tile_flags |= RADEON_DST_TILE_MACRO; 1273 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { 1274 if (reg == RADEON_SRC_PITCH_OFFSET) { 1275 DRM_ERROR("Cannot src blit from microtiled surface\n"); 1276 r100_cs_dump_packet(p, pkt); 1277 return -EINVAL; 1278 } 1279 tile_flags |= RADEON_DST_TILE_MICRO; 1280 } 1281 1282 tmp |= tile_flags; 1283 p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; 1284 } else 1285 p->ib.ptr[idx] = (value & 0xffc00000) | tmp; 1286 return 0; 1287} 1288 1289int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, 1290 struct radeon_cs_packet *pkt, 1291 int idx) 1292{ 1293 unsigned c, i; 1294 struct radeon_cs_reloc *reloc; 1295 struct r100_cs_track *track; 1296 int r = 0; 1297 volatile uint32_t *ib; 1298 u32 idx_value; 1299 1300 ib = p->ib.ptr; 1301 track = (struct r100_cs_track *)p->track; 1302 c = radeon_get_ib_value(p, idx++) & 0x1F; 1303 if (c > 16) { 1304 DRM_ERROR("Only 16 vertex buffers are allowed %d\n", 1305 pkt->opcode); 1306 r100_cs_dump_packet(p, pkt); 1307 return -EINVAL; 1308 } 1309 track->num_arrays = c; 1310 for (i = 0; i < (c - 1); i+=2, idx+=3) { 1311 r = r100_cs_packet_next_reloc(p, &reloc); 1312 if (r) { 1313 DRM_ERROR("No reloc for packet3 %d\n", 1314 pkt->opcode); 1315 r100_cs_dump_packet(p, pkt); 1316 return r; 1317 } 1318 idx_value = radeon_get_ib_value(p, idx); 1319 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); 1320 1321 track->arrays[i + 0].esize = idx_value >> 8; 1322 track->arrays[i + 0].robj = reloc->robj; 1323 track->arrays[i + 0].esize &= 0x7F; 1324 r = r100_cs_packet_next_reloc(p, &reloc); 1325 if (r) { 1326 DRM_ERROR("No reloc for packet3 %d\n", 1327 pkt->opcode); 1328 r100_cs_dump_packet(p, pkt); 1329 return r; 1330 } 1331 ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); 1332 track->arrays[i + 1].robj = reloc->robj; 1333 track->arrays[i + 1].esize = idx_value >> 24; 1334 track->arrays[i + 1].esize &= 0x7F; 1335 } 1336 if (c & 1) { 1337 r = r100_cs_packet_next_reloc(p, &reloc); 1338 if (r) { 1339 DRM_ERROR("No reloc for packet3 %d\n", 1340 pkt->opcode); 1341 r100_cs_dump_packet(p, pkt); 1342 return r; 1343 } 1344 idx_value = radeon_get_ib_value(p, idx); 1345 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); 1346 track->arrays[i + 0].robj = reloc->robj; 1347 track->arrays[i + 0].esize = idx_value >> 8; 1348 track->arrays[i + 0].esize &= 0x7F; 1349 } 1350 return r; 1351} 1352 1353int r100_cs_parse_packet0(struct radeon_cs_parser *p, 1354 struct radeon_cs_packet *pkt, 1355 const unsigned *auth, unsigned n, 1356 radeon_packet0_check_t check) 1357{ 1358 unsigned reg; 1359 unsigned i, j, m; 1360 unsigned idx; 1361 int r; 1362 1363 idx = pkt->idx + 1; 1364 reg = pkt->reg; 1365 /* Check that register fall into register range 1366 * determined by the number of entry (n) in the 1367 * safe register bitmap. 1368 */ 1369 if (pkt->one_reg_wr) { 1370 if ((reg >> 7) > n) { 1371 return -EINVAL; 1372 } 1373 } else { 1374 if (((reg + (pkt->count << 2)) >> 7) > n) { 1375 return -EINVAL; 1376 } 1377 } 1378 for (i = 0; i <= pkt->count; i++, idx++) { 1379 j = (reg >> 7); 1380 m = 1 << ((reg >> 2) & 31); 1381 if (auth[j] & m) { 1382 r = check(p, pkt, idx, reg); 1383 if (r) { 1384 return r; 1385 } 1386 } 1387 if (pkt->one_reg_wr) { 1388 if (!(auth[j] & m)) { 1389 break; 1390 } 1391 } else { 1392 reg += 4; 1393 } 1394 } 1395 return 0; 1396} 1397 1398void r100_cs_dump_packet(struct radeon_cs_parser *p, 1399 struct radeon_cs_packet *pkt) 1400{ 1401 volatile uint32_t *ib; 1402 unsigned i; 1403 unsigned idx; 1404 1405 ib = p->ib.ptr; 1406 idx = pkt->idx; 1407 for (i = 0; i <= (pkt->count + 1); i++, idx++) { 1408 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 1409 } 1410} 1411 1412/** 1413 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet 1414 * @parser: parser structure holding parsing context. 1415 * @pkt: where to store packet informations 1416 * 1417 * Assume that chunk_ib_index is properly set. Will return -EINVAL 1418 * if packet is bigger than remaining ib size. or if packets is unknown. 1419 **/ 1420int r100_cs_packet_parse(struct radeon_cs_parser *p, 1421 struct radeon_cs_packet *pkt, 1422 unsigned idx) 1423{ 1424 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 1425 uint32_t header; 1426 1427 if (idx >= ib_chunk->length_dw) { 1428 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 1429 idx, ib_chunk->length_dw); 1430 return -EINVAL; 1431 } 1432 header = radeon_get_ib_value(p, idx); 1433 pkt->idx = idx; 1434 pkt->type = CP_PACKET_GET_TYPE(header); 1435 pkt->count = CP_PACKET_GET_COUNT(header); 1436 switch (pkt->type) { 1437 case PACKET_TYPE0: 1438 pkt->reg = CP_PACKET0_GET_REG(header); 1439 pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header); 1440 break; 1441 case PACKET_TYPE3: 1442 pkt->opcode = CP_PACKET3_GET_OPCODE(header); 1443 break; 1444 case PACKET_TYPE2: 1445 pkt->count = -1; 1446 break; 1447 default: 1448 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 1449 return -EINVAL; 1450 } 1451 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 1452 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 1453 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 1454 return -EINVAL; 1455 } 1456 return 0; 1457} 1458 1459/** 1460 * r100_cs_packet_next_vline() - parse userspace VLINE packet 1461 * @parser: parser structure holding parsing context. 1462 * 1463 * Userspace sends a special sequence for VLINE waits. 1464 * PACKET0 - VLINE_START_END + value 1465 * PACKET0 - WAIT_UNTIL +_value 1466 * RELOC (P3) - crtc_id in reloc. 1467 * 1468 * This function parses this and relocates the VLINE START END 1469 * and WAIT UNTIL packets to the correct crtc. 1470 * It also detects a switched off crtc and nulls out the 1471 * wait in that case. 1472 */ 1473int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) 1474{ 1475 struct drm_mode_object *obj; 1476 struct drm_crtc *crtc; 1477 struct radeon_crtc *radeon_crtc; 1478 struct radeon_cs_packet p3reloc, waitreloc; 1479 int crtc_id; 1480 int r; 1481 uint32_t header, h_idx, reg; 1482 volatile uint32_t *ib; 1483 1484 ib = p->ib.ptr; 1485 1486 /* parse the wait until */ 1487 r = r100_cs_packet_parse(p, &waitreloc, p->idx); 1488 if (r) 1489 return r; 1490 1491 /* check its a wait until and only 1 count */ 1492 if (waitreloc.reg != RADEON_WAIT_UNTIL || 1493 waitreloc.count != 0) { 1494 DRM_ERROR("vline wait had illegal wait until segment\n"); 1495 return -EINVAL; 1496 } 1497 1498 if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) { 1499 DRM_ERROR("vline wait had illegal wait until\n"); 1500 return -EINVAL; 1501 } 1502 1503 /* jump over the NOP */ 1504 r = r100_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2); 1505 if (r) 1506 return r; 1507 1508 h_idx = p->idx - 2; 1509 p->idx += waitreloc.count + 2; 1510 p->idx += p3reloc.count + 2; 1511 1512 header = radeon_get_ib_value(p, h_idx); 1513 crtc_id = radeon_get_ib_value(p, h_idx + 5); 1514 reg = CP_PACKET0_GET_REG(header); 1515 obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC); 1516 if (!obj) { 1517 DRM_ERROR("cannot find crtc %d\n", crtc_id); 1518 return -EINVAL; 1519 } 1520 crtc = obj_to_crtc(obj); 1521 radeon_crtc = to_radeon_crtc(crtc); 1522 crtc_id = radeon_crtc->crtc_id; 1523 1524 if (!crtc->enabled) { 1525 /* if the CRTC isn't enabled - we need to nop out the wait until */ 1526 ib[h_idx + 2] = PACKET2(0); 1527 ib[h_idx + 3] = PACKET2(0); 1528 } else if (crtc_id == 1) { 1529 switch (reg) { 1530 case AVIVO_D1MODE_VLINE_START_END: 1531 header &= ~R300_CP_PACKET0_REG_MASK; 1532 header |= AVIVO_D2MODE_VLINE_START_END >> 2; 1533 break; 1534 case RADEON_CRTC_GUI_TRIG_VLINE: 1535 header &= ~R300_CP_PACKET0_REG_MASK; 1536 header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2; 1537 break; 1538 default: 1539 DRM_ERROR("unknown crtc reloc\n"); 1540 return -EINVAL; 1541 } 1542 ib[h_idx] = header; 1543 ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1; 1544 } 1545 1546 return 0; 1547} 1548 1549/** 1550 * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3 1551 * @parser: parser structure holding parsing context. 1552 * @data: pointer to relocation data 1553 * @offset_start: starting offset 1554 * @offset_mask: offset mask (to align start offset on) 1555 * @reloc: reloc informations 1556 * 1557 * Check next packet is relocation packet3, do bo validation and compute 1558 * GPU offset using the provided start. 1559 **/ 1560int r100_cs_packet_next_reloc(struct radeon_cs_parser *p, 1561 struct radeon_cs_reloc **cs_reloc) 1562{ 1563 struct radeon_cs_chunk *relocs_chunk; 1564 struct radeon_cs_packet p3reloc; 1565 unsigned idx; 1566 int r; 1567 1568 if (p->chunk_relocs_idx == -1) { 1569 DRM_ERROR("No relocation chunk !\n"); 1570 return -EINVAL; 1571 } 1572 *cs_reloc = NULL; 1573 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 1574 r = r100_cs_packet_parse(p, &p3reloc, p->idx); 1575 if (r) { 1576 return r; 1577 } 1578 p->idx += p3reloc.count + 2; 1579 if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { 1580 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 1581 p3reloc.idx); 1582 r100_cs_dump_packet(p, &p3reloc); 1583 return -EINVAL; 1584 } 1585 idx = radeon_get_ib_value(p, p3reloc.idx + 1); 1586 if (idx >= relocs_chunk->length_dw) { 1587 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 1588 idx, relocs_chunk->length_dw); 1589 r100_cs_dump_packet(p, &p3reloc); 1590 return -EINVAL; 1591 } 1592 /* FIXME: we assume reloc size is 4 dwords */ 1593 *cs_reloc = p->relocs_ptr[(idx / 4)]; 1594 return 0; 1595} 1596 1597static int r100_get_vtx_size(uint32_t vtx_fmt) 1598{ 1599 int vtx_size; 1600 vtx_size = 2; 1601 /* ordered according to bits in spec */ 1602 if (vtx_fmt & RADEON_SE_VTX_FMT_W0) 1603 vtx_size++; 1604 if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR) 1605 vtx_size += 3; 1606 if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA) 1607 vtx_size++; 1608 if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR) 1609 vtx_size++; 1610 if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC) 1611 vtx_size += 3; 1612 if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG) 1613 vtx_size++; 1614 if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC) 1615 vtx_size++; 1616 if (vtx_fmt & RADEON_SE_VTX_FMT_ST0) 1617 vtx_size += 2; 1618 if (vtx_fmt & RADEON_SE_VTX_FMT_ST1) 1619 vtx_size += 2; 1620 if (vtx_fmt & RADEON_SE_VTX_FMT_Q1) 1621 vtx_size++; 1622 if (vtx_fmt & RADEON_SE_VTX_FMT_ST2) 1623 vtx_size += 2; 1624 if (vtx_fmt & RADEON_SE_VTX_FMT_Q2) 1625 vtx_size++; 1626 if (vtx_fmt & RADEON_SE_VTX_FMT_ST3) 1627 vtx_size += 2; 1628 if (vtx_fmt & RADEON_SE_VTX_FMT_Q3) 1629 vtx_size++; 1630 if (vtx_fmt & RADEON_SE_VTX_FMT_Q0) 1631 vtx_size++; 1632 /* blend weight */ 1633 if (vtx_fmt & (0x7 << 15)) 1634 vtx_size += (vtx_fmt >> 15) & 0x7; 1635 if (vtx_fmt & RADEON_SE_VTX_FMT_N0) 1636 vtx_size += 3; 1637 if (vtx_fmt & RADEON_SE_VTX_FMT_XY1) 1638 vtx_size += 2; 1639 if (vtx_fmt & RADEON_SE_VTX_FMT_Z1) 1640 vtx_size++; 1641 if (vtx_fmt & RADEON_SE_VTX_FMT_W1) 1642 vtx_size++; 1643 if (vtx_fmt & RADEON_SE_VTX_FMT_N1) 1644 vtx_size++; 1645 if (vtx_fmt & RADEON_SE_VTX_FMT_Z) 1646 vtx_size++; 1647 return vtx_size; 1648} 1649 1650static int r100_packet0_check(struct radeon_cs_parser *p, 1651 struct radeon_cs_packet *pkt, 1652 unsigned idx, unsigned reg) 1653{ 1654 struct radeon_cs_reloc *reloc; 1655 struct r100_cs_track *track; 1656 volatile uint32_t *ib; 1657 uint32_t tmp; 1658 int r; 1659 int i, face; 1660 u32 tile_flags = 0; 1661 u32 idx_value; 1662 1663 ib = p->ib.ptr; 1664 track = (struct r100_cs_track *)p->track; 1665 1666 idx_value = radeon_get_ib_value(p, idx); 1667 1668 switch (reg) { 1669 case RADEON_CRTC_GUI_TRIG_VLINE: 1670 r = r100_cs_packet_parse_vline(p); 1671 if (r) { 1672 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1673 idx, reg); 1674 r100_cs_dump_packet(p, pkt); 1675 return r; 1676 } 1677 break; 1678 /* FIXME: only allow PACKET3 blit? easier to check for out of 1679 * range access */ 1680 case RADEON_DST_PITCH_OFFSET: 1681 case RADEON_SRC_PITCH_OFFSET: 1682 r = r100_reloc_pitch_offset(p, pkt, idx, reg); 1683 if (r) 1684 return r; 1685 break; 1686 case RADEON_RB3D_DEPTHOFFSET: 1687 r = r100_cs_packet_next_reloc(p, &reloc); 1688 if (r) { 1689 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1690 idx, reg); 1691 r100_cs_dump_packet(p, pkt); 1692 return r; 1693 } 1694 track->zb.robj = reloc->robj; 1695 track->zb.offset = idx_value; 1696 track->zb_dirty = true; 1697 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1698 break; 1699 case RADEON_RB3D_COLOROFFSET: 1700 r = r100_cs_packet_next_reloc(p, &reloc); 1701 if (r) { 1702 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1703 idx, reg); 1704 r100_cs_dump_packet(p, pkt); 1705 return r; 1706 } 1707 track->cb[0].robj = reloc->robj; 1708 track->cb[0].offset = idx_value; 1709 track->cb_dirty = true; 1710 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1711 break; 1712 case RADEON_PP_TXOFFSET_0: 1713 case RADEON_PP_TXOFFSET_1: 1714 case RADEON_PP_TXOFFSET_2: 1715 i = (reg - RADEON_PP_TXOFFSET_0) / 24; 1716 r = r100_cs_packet_next_reloc(p, &reloc); 1717 if (r) { 1718 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1719 idx, reg); 1720 r100_cs_dump_packet(p, pkt); 1721 return r; 1722 } 1723 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1724 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1725 tile_flags |= RADEON_TXO_MACRO_TILE; 1726 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1727 tile_flags |= RADEON_TXO_MICRO_TILE_X2; 1728 1729 tmp = idx_value & ~(0x7 << 2); 1730 tmp |= tile_flags; 1731 ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); 1732 } else 1733 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1734 track->textures[i].robj = reloc->robj; 1735 track->tex_dirty = true; 1736 break; 1737 case RADEON_PP_CUBIC_OFFSET_T0_0: 1738 case RADEON_PP_CUBIC_OFFSET_T0_1: 1739 case RADEON_PP_CUBIC_OFFSET_T0_2: 1740 case RADEON_PP_CUBIC_OFFSET_T0_3: 1741 case RADEON_PP_CUBIC_OFFSET_T0_4: 1742 i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4; 1743 r = r100_cs_packet_next_reloc(p, &reloc); 1744 if (r) { 1745 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1746 idx, reg); 1747 r100_cs_dump_packet(p, pkt); 1748 return r; 1749 } 1750 track->textures[0].cube_info[i].offset = idx_value; 1751 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1752 track->textures[0].cube_info[i].robj = reloc->robj; 1753 track->tex_dirty = true; 1754 break; 1755 case RADEON_PP_CUBIC_OFFSET_T1_0: 1756 case RADEON_PP_CUBIC_OFFSET_T1_1: 1757 case RADEON_PP_CUBIC_OFFSET_T1_2: 1758 case RADEON_PP_CUBIC_OFFSET_T1_3: 1759 case RADEON_PP_CUBIC_OFFSET_T1_4: 1760 i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4; 1761 r = r100_cs_packet_next_reloc(p, &reloc); 1762 if (r) { 1763 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1764 idx, reg); 1765 r100_cs_dump_packet(p, pkt); 1766 return r; 1767 } 1768 track->textures[1].cube_info[i].offset = idx_value; 1769 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1770 track->textures[1].cube_info[i].robj = reloc->robj; 1771 track->tex_dirty = true; 1772 break; 1773 case RADEON_PP_CUBIC_OFFSET_T2_0: 1774 case RADEON_PP_CUBIC_OFFSET_T2_1: 1775 case RADEON_PP_CUBIC_OFFSET_T2_2: 1776 case RADEON_PP_CUBIC_OFFSET_T2_3: 1777 case RADEON_PP_CUBIC_OFFSET_T2_4: 1778 i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4; 1779 r = r100_cs_packet_next_reloc(p, &reloc); 1780 if (r) { 1781 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1782 idx, reg); 1783 r100_cs_dump_packet(p, pkt); 1784 return r; 1785 } 1786 track->textures[2].cube_info[i].offset = idx_value; 1787 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1788 track->textures[2].cube_info[i].robj = reloc->robj; 1789 track->tex_dirty = true; 1790 break; 1791 case RADEON_RE_WIDTH_HEIGHT: 1792 track->maxy = ((idx_value >> 16) & 0x7FF); 1793 track->cb_dirty = true; 1794 track->zb_dirty = true; 1795 break; 1796 case RADEON_RB3D_COLORPITCH: 1797 r = r100_cs_packet_next_reloc(p, &reloc); 1798 if (r) { 1799 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1800 idx, reg); 1801 r100_cs_dump_packet(p, pkt); 1802 return r; 1803 } 1804 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1805 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1806 tile_flags |= RADEON_COLOR_TILE_ENABLE; 1807 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1808 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 1809 1810 tmp = idx_value & ~(0x7 << 16); 1811 tmp |= tile_flags; 1812 ib[idx] = tmp; 1813 } else 1814 ib[idx] = idx_value; 1815 1816 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 1817 track->cb_dirty = true; 1818 break; 1819 case RADEON_RB3D_DEPTHPITCH: 1820 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 1821 track->zb_dirty = true; 1822 break; 1823 case RADEON_RB3D_CNTL: 1824 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 1825 case 7: 1826 case 8: 1827 case 9: 1828 case 11: 1829 case 12: 1830 track->cb[0].cpp = 1; 1831 break; 1832 case 3: 1833 case 4: 1834 case 15: 1835 track->cb[0].cpp = 2; 1836 break; 1837 case 6: 1838 track->cb[0].cpp = 4; 1839 break; 1840 default: 1841 DRM_ERROR("Invalid color buffer format (%d) !\n", 1842 ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); 1843 return -EINVAL; 1844 } 1845 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 1846 track->cb_dirty = true; 1847 track->zb_dirty = true; 1848 break; 1849 case RADEON_RB3D_ZSTENCILCNTL: 1850 switch (idx_value & 0xf) { 1851 case 0: 1852 track->zb.cpp = 2; 1853 break; 1854 case 2: 1855 case 3: 1856 case 4: 1857 case 5: 1858 case 9: 1859 case 11: 1860 track->zb.cpp = 4; 1861 break; 1862 default: 1863 break; 1864 } 1865 track->zb_dirty = true; 1866 break; 1867 case RADEON_RB3D_ZPASS_ADDR: 1868 r = r100_cs_packet_next_reloc(p, &reloc); 1869 if (r) { 1870 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1871 idx, reg); 1872 r100_cs_dump_packet(p, pkt); 1873 return r; 1874 } 1875 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 1876 break; 1877 case RADEON_PP_CNTL: 1878 { 1879 uint32_t temp = idx_value >> 4; 1880 for (i = 0; i < track->num_texture; i++) 1881 track->textures[i].enabled = !!(temp & (1 << i)); 1882 track->tex_dirty = true; 1883 } 1884 break; 1885 case RADEON_SE_VF_CNTL: 1886 track->vap_vf_cntl = idx_value; 1887 break; 1888 case RADEON_SE_VTX_FMT: 1889 track->vtx_size = r100_get_vtx_size(idx_value); 1890 break; 1891 case RADEON_PP_TEX_SIZE_0: 1892 case RADEON_PP_TEX_SIZE_1: 1893 case RADEON_PP_TEX_SIZE_2: 1894 i = (reg - RADEON_PP_TEX_SIZE_0) / 8; 1895 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 1896 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 1897 track->tex_dirty = true; 1898 break; 1899 case RADEON_PP_TEX_PITCH_0: 1900 case RADEON_PP_TEX_PITCH_1: 1901 case RADEON_PP_TEX_PITCH_2: 1902 i = (reg - RADEON_PP_TEX_PITCH_0) / 8; 1903 track->textures[i].pitch = idx_value + 32; 1904 track->tex_dirty = true; 1905 break; 1906 case RADEON_PP_TXFILTER_0: 1907 case RADEON_PP_TXFILTER_1: 1908 case RADEON_PP_TXFILTER_2: 1909 i = (reg - RADEON_PP_TXFILTER_0) / 24; 1910 track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK) 1911 >> RADEON_MAX_MIP_LEVEL_SHIFT); 1912 tmp = (idx_value >> 23) & 0x7; 1913 if (tmp == 2 || tmp == 6) 1914 track->textures[i].roundup_w = false; 1915 tmp = (idx_value >> 27) & 0x7; 1916 if (tmp == 2 || tmp == 6) 1917 track->textures[i].roundup_h = false; 1918 track->tex_dirty = true; 1919 break; 1920 case RADEON_PP_TXFORMAT_0: 1921 case RADEON_PP_TXFORMAT_1: 1922 case RADEON_PP_TXFORMAT_2: 1923 i = (reg - RADEON_PP_TXFORMAT_0) / 24; 1924 if (idx_value & RADEON_TXFORMAT_NON_POWER2) { 1925 track->textures[i].use_pitch = 1; 1926 } else { 1927 track->textures[i].use_pitch = 0; 1928 track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); 1929 track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); 1930 } 1931 if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) 1932 track->textures[i].tex_coord_type = 2; 1933 switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) { 1934 case RADEON_TXFORMAT_I8: 1935 case RADEON_TXFORMAT_RGB332: 1936 case RADEON_TXFORMAT_Y8: 1937 track->textures[i].cpp = 1; 1938 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 1939 break; 1940 case RADEON_TXFORMAT_AI88: 1941 case RADEON_TXFORMAT_ARGB1555: 1942 case RADEON_TXFORMAT_RGB565: 1943 case RADEON_TXFORMAT_ARGB4444: 1944 case RADEON_TXFORMAT_VYUY422: 1945 case RADEON_TXFORMAT_YVYU422: 1946 case RADEON_TXFORMAT_SHADOW16: 1947 case RADEON_TXFORMAT_LDUDV655: 1948 case RADEON_TXFORMAT_DUDV88: 1949 track->textures[i].cpp = 2; 1950 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 1951 break; 1952 case RADEON_TXFORMAT_ARGB8888: 1953 case RADEON_TXFORMAT_RGBA8888: 1954 case RADEON_TXFORMAT_SHADOW32: 1955 case RADEON_TXFORMAT_LDUDUV8888: 1956 track->textures[i].cpp = 4; 1957 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 1958 break; 1959 case RADEON_TXFORMAT_DXT1: 1960 track->textures[i].cpp = 1; 1961 track->textures[i].compress_format = R100_TRACK_COMP_DXT1; 1962 break; 1963 case RADEON_TXFORMAT_DXT23: 1964 case RADEON_TXFORMAT_DXT45: 1965 track->textures[i].cpp = 1; 1966 track->textures[i].compress_format = R100_TRACK_COMP_DXT35; 1967 break; 1968 } 1969 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 1970 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 1971 track->tex_dirty = true; 1972 break; 1973 case RADEON_PP_CUBIC_FACES_0: 1974 case RADEON_PP_CUBIC_FACES_1: 1975 case RADEON_PP_CUBIC_FACES_2: 1976 tmp = idx_value; 1977 i = (reg - RADEON_PP_CUBIC_FACES_0) / 4; 1978 for (face = 0; face < 4; face++) { 1979 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 1980 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 1981 } 1982 track->tex_dirty = true; 1983 break; 1984 default: 1985 DRM_ERROR("Forbidden register 0x%04X in cs at %d\n", 1986 reg, idx); 1987 return -EINVAL; 1988 } 1989 return 0; 1990} 1991 1992int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, 1993 struct radeon_cs_packet *pkt, 1994 struct radeon_bo *robj) 1995{ 1996 unsigned idx; 1997 u32 value; 1998 idx = pkt->idx + 1; 1999 value = radeon_get_ib_value(p, idx + 2); 2000 if ((value + 1) > radeon_bo_size(robj)) { 2001 DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " 2002 "(need %u have %lu) !\n", 2003 value + 1, 2004 radeon_bo_size(robj)); 2005 return -EINVAL; 2006 } 2007 return 0; 2008} 2009 2010static int r100_packet3_check(struct radeon_cs_parser *p, 2011 struct radeon_cs_packet *pkt) 2012{ 2013 struct radeon_cs_reloc *reloc; 2014 struct r100_cs_track *track; 2015 unsigned idx; 2016 volatile uint32_t *ib; 2017 int r; 2018 2019 ib = p->ib.ptr; 2020 idx = pkt->idx + 1; 2021 track = (struct r100_cs_track *)p->track; 2022 switch (pkt->opcode) { 2023 case PACKET3_3D_LOAD_VBPNTR: 2024 r = r100_packet3_load_vbpntr(p, pkt, idx); 2025 if (r) 2026 return r; 2027 break; 2028 case PACKET3_INDX_BUFFER: 2029 r = r100_cs_packet_next_reloc(p, &reloc); 2030 if (r) { 2031 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 2032 r100_cs_dump_packet(p, pkt); 2033 return r; 2034 } 2035 ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset); 2036 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); 2037 if (r) { 2038 return r; 2039 } 2040 break; 2041 case 0x23: 2042 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ 2043 r = r100_cs_packet_next_reloc(p, &reloc); 2044 if (r) { 2045 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); 2046 r100_cs_dump_packet(p, pkt); 2047 return r; 2048 } 2049 ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset); 2050 track->num_arrays = 1; 2051 track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2)); 2052 2053 track->arrays[0].robj = reloc->robj; 2054 track->arrays[0].esize = track->vtx_size; 2055 2056 track->max_indx = radeon_get_ib_value(p, idx+1); 2057 2058 track->vap_vf_cntl = radeon_get_ib_value(p, idx+3); 2059 track->immd_dwords = pkt->count - 1; 2060 r = r100_cs_track_check(p->rdev, track); 2061 if (r) 2062 return r; 2063 break; 2064 case PACKET3_3D_DRAW_IMMD: 2065 if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { 2066 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 2067 return -EINVAL; 2068 } 2069 track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0)); 2070 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 2071 track->immd_dwords = pkt->count - 1; 2072 r = r100_cs_track_check(p->rdev, track); 2073 if (r) 2074 return r; 2075 break; 2076 /* triggers drawing using in-packet vertex data */ 2077 case PACKET3_3D_DRAW_IMMD_2: 2078 if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { 2079 DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); 2080 return -EINVAL; 2081 } 2082 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 2083 track->immd_dwords = pkt->count; 2084 r = r100_cs_track_check(p->rdev, track); 2085 if (r) 2086 return r; 2087 break; 2088 /* triggers drawing using in-packet vertex data */ 2089 case PACKET3_3D_DRAW_VBUF_2: 2090 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 2091 r = r100_cs_track_check(p->rdev, track); 2092 if (r) 2093 return r; 2094 break; 2095 /* triggers drawing of vertex buffers setup elsewhere */ 2096 case PACKET3_3D_DRAW_INDX_2: 2097 track->vap_vf_cntl = radeon_get_ib_value(p, idx); 2098 r = r100_cs_track_check(p->rdev, track); 2099 if (r) 2100 return r; 2101 break; 2102 /* triggers drawing using indices to vertex buffer */ 2103 case PACKET3_3D_DRAW_VBUF: 2104 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 2105 r = r100_cs_track_check(p->rdev, track); 2106 if (r) 2107 return r; 2108 break; 2109 /* triggers drawing of vertex buffers setup elsewhere */ 2110 case PACKET3_3D_DRAW_INDX: 2111 track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); 2112 r = r100_cs_track_check(p->rdev, track); 2113 if (r) 2114 return r; 2115 break; 2116 /* triggers drawing using indices to vertex buffer */ 2117 case PACKET3_3D_CLEAR_HIZ: 2118 case PACKET3_3D_CLEAR_ZMASK: 2119 if (p->rdev->hyperz_filp != p->filp) 2120 return -EINVAL; 2121 break; 2122 case PACKET3_NOP: 2123 break; 2124 default: 2125 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); 2126 return -EINVAL; 2127 } 2128 return 0; 2129} 2130 2131int r100_cs_parse(struct radeon_cs_parser *p) 2132{ 2133 struct radeon_cs_packet pkt; 2134 struct r100_cs_track *track; 2135 int r; 2136 2137 track = malloc(sizeof(*track), DRM_MEM_DRIVER, M_ZERO | M_WAITOK); 2138 if (!track) 2139 return -ENOMEM; 2140 r100_cs_track_clear(p->rdev, track); 2141 p->track = track; 2142 do { 2143 r = r100_cs_packet_parse(p, &pkt, p->idx); 2144 if (r) { 2145 free(p->track, DRM_MEM_DRIVER); 2146 p->track = NULL; 2147 return r; 2148 } 2149 p->idx += pkt.count + 2; 2150 switch (pkt.type) { 2151 case PACKET_TYPE0: 2152 if (p->rdev->family >= CHIP_R200) 2153 r = r100_cs_parse_packet0(p, &pkt, 2154 p->rdev->config.r100.reg_safe_bm, 2155 p->rdev->config.r100.reg_safe_bm_size, 2156 &r200_packet0_check); 2157 else 2158 r = r100_cs_parse_packet0(p, &pkt, 2159 p->rdev->config.r100.reg_safe_bm, 2160 p->rdev->config.r100.reg_safe_bm_size, 2161 &r100_packet0_check); 2162 break; 2163 case PACKET_TYPE2: 2164 break; 2165 case PACKET_TYPE3: 2166 r = r100_packet3_check(p, &pkt); 2167 break; 2168 default: 2169 DRM_ERROR("Unknown packet type %d !\n", 2170 pkt.type); 2171 free(p->track, DRM_MEM_DRIVER); 2172 p->track = NULL; 2173 return -EINVAL; 2174 } 2175 if (r) { 2176 free(p->track, DRM_MEM_DRIVER); 2177 p->track = NULL; 2178 return r; 2179 } 2180 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 2181 free(p->track, DRM_MEM_DRIVER); 2182 p->track = NULL; 2183 return 0; 2184} 2185 2186static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) 2187{ 2188 DRM_ERROR("pitch %d\n", t->pitch); 2189 DRM_ERROR("use_pitch %d\n", t->use_pitch); 2190 DRM_ERROR("width %d\n", t->width); 2191 DRM_ERROR("width_11 %d\n", t->width_11); 2192 DRM_ERROR("height %d\n", t->height); 2193 DRM_ERROR("height_11 %d\n", t->height_11); 2194 DRM_ERROR("num levels %d\n", t->num_levels); 2195 DRM_ERROR("depth %d\n", t->txdepth); 2196 DRM_ERROR("bpp %d\n", t->cpp); 2197 DRM_ERROR("coordinate type %d\n", t->tex_coord_type); 2198 DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); 2199 DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); 2200 DRM_ERROR("compress format %d\n", t->compress_format); 2201} 2202 2203static int r100_track_compress_size(int compress_format, int w, int h) 2204{ 2205 int block_width, block_height, block_bytes; 2206 int wblocks, hblocks; 2207 int min_wblocks; 2208 int sz; 2209 2210 block_width = 4; 2211 block_height = 4; 2212 2213 switch (compress_format) { 2214 case R100_TRACK_COMP_DXT1: 2215 block_bytes = 8; 2216 min_wblocks = 4; 2217 break; 2218 default: 2219 case R100_TRACK_COMP_DXT35: 2220 block_bytes = 16; 2221 min_wblocks = 2; 2222 break; 2223 } 2224 2225 hblocks = (h + block_height - 1) / block_height; 2226 wblocks = (w + block_width - 1) / block_width; 2227 if (wblocks < min_wblocks) 2228 wblocks = min_wblocks; 2229 sz = wblocks * hblocks * block_bytes; 2230 return sz; 2231} 2232 2233static int r100_cs_track_cube(struct radeon_device *rdev, 2234 struct r100_cs_track *track, unsigned idx) 2235{ 2236 unsigned face, w, h; 2237 struct radeon_bo *cube_robj; 2238 unsigned long size; 2239 unsigned compress_format = track->textures[idx].compress_format; 2240 2241 for (face = 0; face < 5; face++) { 2242 cube_robj = track->textures[idx].cube_info[face].robj; 2243 w = track->textures[idx].cube_info[face].width; 2244 h = track->textures[idx].cube_info[face].height; 2245 2246 if (compress_format) { 2247 size = r100_track_compress_size(compress_format, w, h); 2248 } else 2249 size = w * h; 2250 size *= track->textures[idx].cpp; 2251 2252 size += track->textures[idx].cube_info[face].offset; 2253 2254 if (size > radeon_bo_size(cube_robj)) { 2255 DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", 2256 size, radeon_bo_size(cube_robj)); 2257 r100_cs_track_texture_print(&track->textures[idx]); 2258 return -1; 2259 } 2260 } 2261 return 0; 2262} 2263 2264static int r100_cs_track_texture_check(struct radeon_device *rdev, 2265 struct r100_cs_track *track) 2266{ 2267 struct radeon_bo *robj; 2268 unsigned long size; 2269 unsigned u, i, w, h, d; 2270 int ret; 2271 2272 for (u = 0; u < track->num_texture; u++) { 2273 if (!track->textures[u].enabled) 2274 continue; 2275 if (track->textures[u].lookup_disable) 2276 continue; 2277 robj = track->textures[u].robj; 2278 if (robj == NULL) { 2279 DRM_ERROR("No texture bound to unit %u\n", u); 2280 return -EINVAL; 2281 } 2282 size = 0; 2283 for (i = 0; i <= track->textures[u].num_levels; i++) { 2284 if (track->textures[u].use_pitch) { 2285 if (rdev->family < CHIP_R300) 2286 w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); 2287 else 2288 w = track->textures[u].pitch / (1 << i); 2289 } else { 2290 w = track->textures[u].width; 2291 if (rdev->family >= CHIP_RV515) 2292 w |= track->textures[u].width_11; 2293 w = w / (1 << i); 2294 if (track->textures[u].roundup_w) 2295 w = roundup_pow_of_two(w); 2296 } 2297 h = track->textures[u].height; 2298 if (rdev->family >= CHIP_RV515) 2299 h |= track->textures[u].height_11; 2300 h = h / (1 << i); 2301 if (track->textures[u].roundup_h) 2302 h = roundup_pow_of_two(h); 2303 if (track->textures[u].tex_coord_type == 1) { 2304 d = (1 << track->textures[u].txdepth) / (1 << i); 2305 if (!d) 2306 d = 1; 2307 } else { 2308 d = 1; 2309 } 2310 if (track->textures[u].compress_format) { 2311 2312 size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; 2313 /* compressed textures are block based */ 2314 } else 2315 size += w * h * d; 2316 } 2317 size *= track->textures[u].cpp; 2318 2319 switch (track->textures[u].tex_coord_type) { 2320 case 0: 2321 case 1: 2322 break; 2323 case 2: 2324 if (track->separate_cube) { 2325 ret = r100_cs_track_cube(rdev, track, u); 2326 if (ret) 2327 return ret; 2328 } else 2329 size *= 6; 2330 break; 2331 default: 2332 DRM_ERROR("Invalid texture coordinate type %u for unit " 2333 "%u\n", track->textures[u].tex_coord_type, u); 2334 return -EINVAL; 2335 } 2336 if (size > radeon_bo_size(robj)) { 2337 DRM_ERROR("Texture of unit %u needs %lu bytes but is " 2338 "%lu\n", u, size, radeon_bo_size(robj)); 2339 r100_cs_track_texture_print(&track->textures[u]); 2340 return -EINVAL; 2341 } 2342 } 2343 return 0; 2344} 2345 2346int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) 2347{ 2348 unsigned i; 2349 unsigned long size; 2350 unsigned prim_walk; 2351 unsigned nverts; 2352 unsigned num_cb = track->cb_dirty ? track->num_cb : 0; 2353 2354 if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && 2355 !track->blend_read_enable) 2356 num_cb = 0; 2357 2358 for (i = 0; i < num_cb; i++) { 2359 if (track->cb[i].robj == NULL) { 2360 DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); 2361 return -EINVAL; 2362 } 2363 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; 2364 size += track->cb[i].offset; 2365 if (size > radeon_bo_size(track->cb[i].robj)) { 2366 DRM_ERROR("[drm] Buffer too small for color buffer %d " 2367 "(need %lu have %lu) !\n", i, size, 2368 radeon_bo_size(track->cb[i].robj)); 2369 DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", 2370 i, track->cb[i].pitch, track->cb[i].cpp, 2371 track->cb[i].offset, track->maxy); 2372 return -EINVAL; 2373 } 2374 } 2375 track->cb_dirty = false; 2376 2377 if (track->zb_dirty && track->z_enabled) { 2378 if (track->zb.robj == NULL) { 2379 DRM_ERROR("[drm] No buffer for z buffer !\n"); 2380 return -EINVAL; 2381 } 2382 size = track->zb.pitch * track->zb.cpp * track->maxy; 2383 size += track->zb.offset; 2384 if (size > radeon_bo_size(track->zb.robj)) { 2385 DRM_ERROR("[drm] Buffer too small for z buffer " 2386 "(need %lu have %lu) !\n", size, 2387 radeon_bo_size(track->zb.robj)); 2388 DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", 2389 track->zb.pitch, track->zb.cpp, 2390 track->zb.offset, track->maxy); 2391 return -EINVAL; 2392 } 2393 } 2394 track->zb_dirty = false; 2395 2396 if (track->aa_dirty && track->aaresolve) { 2397 if (track->aa.robj == NULL) { 2398 DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); 2399 return -EINVAL; 2400 } 2401 /* I believe the format comes from colorbuffer0. */ 2402 size = track->aa.pitch * track->cb[0].cpp * track->maxy; 2403 size += track->aa.offset; 2404 if (size > radeon_bo_size(track->aa.robj)) { 2405 DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " 2406 "(need %lu have %lu) !\n", i, size, 2407 radeon_bo_size(track->aa.robj)); 2408 DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", 2409 i, track->aa.pitch, track->cb[0].cpp, 2410 track->aa.offset, track->maxy); 2411 return -EINVAL; 2412 } 2413 } 2414 track->aa_dirty = false; 2415 2416 prim_walk = (track->vap_vf_cntl >> 4) & 0x3; 2417 if (track->vap_vf_cntl & (1 << 14)) { 2418 nverts = track->vap_alt_nverts; 2419 } else { 2420 nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; 2421 } 2422 switch (prim_walk) { 2423 case 1: 2424 for (i = 0; i < track->num_arrays; i++) { 2425 size = track->arrays[i].esize * track->max_indx * 4; 2426 if (track->arrays[i].robj == NULL) { 2427 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2428 "bound\n", prim_walk, i); 2429 return -EINVAL; 2430 } 2431 if (size > radeon_bo_size(track->arrays[i].robj)) { 2432 dev_err(rdev->dev, "(PW %u) Vertex array %u " 2433 "need %lu dwords have %lu dwords\n", 2434 prim_walk, i, size >> 2, 2435 radeon_bo_size(track->arrays[i].robj) 2436 >> 2); 2437 DRM_ERROR("Max indices %u\n", track->max_indx); 2438 return -EINVAL; 2439 } 2440 } 2441 break; 2442 case 2: 2443 for (i = 0; i < track->num_arrays; i++) { 2444 size = track->arrays[i].esize * (nverts - 1) * 4; 2445 if (track->arrays[i].robj == NULL) { 2446 DRM_ERROR("(PW %u) Vertex array %u no buffer " 2447 "bound\n", prim_walk, i); 2448 return -EINVAL; 2449 } 2450 if (size > radeon_bo_size(track->arrays[i].robj)) { 2451 dev_err(rdev->dev, "(PW %u) Vertex array %u " 2452 "need %lu dwords have %lu dwords\n", 2453 prim_walk, i, size >> 2, 2454 radeon_bo_size(track->arrays[i].robj) 2455 >> 2); 2456 return -EINVAL; 2457 } 2458 } 2459 break; 2460 case 3: 2461 size = track->vtx_size * nverts; 2462 if (size != track->immd_dwords) { 2463 DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", 2464 track->immd_dwords, size); 2465 DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", 2466 nverts, track->vtx_size); 2467 return -EINVAL; 2468 } 2469 break; 2470 default: 2471 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", 2472 prim_walk); 2473 return -EINVAL; 2474 } 2475 2476 if (track->tex_dirty) { 2477 track->tex_dirty = false; 2478 return r100_cs_track_texture_check(rdev, track); 2479 } 2480 return 0; 2481} 2482 2483void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) 2484{ 2485 unsigned i, face; 2486 2487 track->cb_dirty = true; 2488 track->zb_dirty = true; 2489 track->tex_dirty = true; 2490 track->aa_dirty = true; 2491 2492 if (rdev->family < CHIP_R300) { 2493 track->num_cb = 1; 2494 if (rdev->family <= CHIP_RS200) 2495 track->num_texture = 3; 2496 else 2497 track->num_texture = 6; 2498 track->maxy = 2048; 2499 track->separate_cube = 1; 2500 } else { 2501 track->num_cb = 4; 2502 track->num_texture = 16; 2503 track->maxy = 4096; 2504 track->separate_cube = 0; 2505 track->aaresolve = false; 2506 track->aa.robj = NULL; 2507 } 2508 2509 for (i = 0; i < track->num_cb; i++) { 2510 track->cb[i].robj = NULL; 2511 track->cb[i].pitch = 8192; 2512 track->cb[i].cpp = 16; 2513 track->cb[i].offset = 0; 2514 } 2515 track->z_enabled = true; 2516 track->zb.robj = NULL; 2517 track->zb.pitch = 8192; 2518 track->zb.cpp = 4; 2519 track->zb.offset = 0; 2520 track->vtx_size = 0x7F; 2521 track->immd_dwords = 0xFFFFFFFFUL; 2522 track->num_arrays = 11; 2523 track->max_indx = 0x00FFFFFFUL; 2524 for (i = 0; i < track->num_arrays; i++) { 2525 track->arrays[i].robj = NULL; 2526 track->arrays[i].esize = 0x7F; 2527 } 2528 for (i = 0; i < track->num_texture; i++) { 2529 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 2530 track->textures[i].pitch = 16536; 2531 track->textures[i].width = 16536; 2532 track->textures[i].height = 16536; 2533 track->textures[i].width_11 = 1 << 11; 2534 track->textures[i].height_11 = 1 << 11; 2535 track->textures[i].num_levels = 12; 2536 if (rdev->family <= CHIP_RS200) { 2537 track->textures[i].tex_coord_type = 0; 2538 track->textures[i].txdepth = 0; 2539 } else { 2540 track->textures[i].txdepth = 16; 2541 track->textures[i].tex_coord_type = 1; 2542 } 2543 track->textures[i].cpp = 64; 2544 track->textures[i].robj = NULL; 2545 /* CS IB emission code makes sure texture unit are disabled */ 2546 track->textures[i].enabled = false; 2547 track->textures[i].lookup_disable = false; 2548 track->textures[i].roundup_w = true; 2549 track->textures[i].roundup_h = true; 2550 if (track->separate_cube) 2551 for (face = 0; face < 5; face++) { 2552 track->textures[i].cube_info[face].robj = NULL; 2553 track->textures[i].cube_info[face].width = 16536; 2554 track->textures[i].cube_info[face].height = 16536; 2555 track->textures[i].cube_info[face].offset = 0; 2556 } 2557 } 2558} 2559 2560/* 2561 * Global GPU functions 2562 */ 2563static void r100_errata(struct radeon_device *rdev) 2564{ 2565 rdev->pll_errata = 0; 2566 2567 if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { 2568 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; 2569 } 2570 2571 if (rdev->family == CHIP_RV100 || 2572 rdev->family == CHIP_RS100 || 2573 rdev->family == CHIP_RS200) { 2574 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; 2575 } 2576} 2577 2578static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) 2579{ 2580 unsigned i; 2581 uint32_t tmp; 2582 2583 for (i = 0; i < rdev->usec_timeout; i++) { 2584 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; 2585 if (tmp >= n) { 2586 return 0; 2587 } 2588 DRM_UDELAY(1); 2589 } 2590 return -1; 2591} 2592 2593int r100_gui_wait_for_idle(struct radeon_device *rdev) 2594{ 2595 unsigned i; 2596 uint32_t tmp; 2597 2598 if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { 2599 DRM_ERROR("radeon: wait for empty RBBM fifo failed !" 2600 " Bad things might happen.\n"); 2601 } 2602 for (i = 0; i < rdev->usec_timeout; i++) { 2603 tmp = RREG32(RADEON_RBBM_STATUS); 2604 if (!(tmp & RADEON_RBBM_ACTIVE)) { 2605 return 0; 2606 } 2607 DRM_UDELAY(1); 2608 } 2609 return -1; 2610} 2611 2612int r100_mc_wait_for_idle(struct radeon_device *rdev) 2613{ 2614 unsigned i; 2615 uint32_t tmp; 2616 2617 for (i = 0; i < rdev->usec_timeout; i++) { 2618 /* read MC_STATUS */ 2619 tmp = RREG32(RADEON_MC_STATUS); 2620 if (tmp & RADEON_MC_IDLE) { 2621 return 0; 2622 } 2623 DRM_UDELAY(1); 2624 } 2625 return -1; 2626} 2627 2628bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 2629{ 2630 u32 rbbm_status; 2631 2632 rbbm_status = RREG32(R_000E40_RBBM_STATUS); 2633 if (!G_000E40_GUI_ACTIVE(rbbm_status)) { 2634 radeon_ring_lockup_update(ring); 2635 return false; 2636 } 2637 /* force CP activities */ 2638 radeon_ring_force_activity(rdev, ring); 2639 return radeon_ring_test_lockup(rdev, ring); 2640} 2641 2642/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */ 2643void r100_enable_bm(struct radeon_device *rdev) 2644{ 2645 uint32_t tmp; 2646 /* Enable bus mastering */ 2647 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; 2648 WREG32(RADEON_BUS_CNTL, tmp); 2649} 2650 2651void r100_bm_disable(struct radeon_device *rdev) 2652{ 2653 u32 tmp; 2654 2655 /* disable bus mastering */ 2656 tmp = RREG32(R_000030_BUS_CNTL); 2657 WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044); 2658 mdelay(1); 2659 WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042); 2660 mdelay(1); 2661 WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); 2662 tmp = RREG32(RADEON_BUS_CNTL); 2663 mdelay(1); 2664 pci_disable_busmaster(rdev->dev); 2665 mdelay(1); 2666} 2667 2668int r100_asic_reset(struct radeon_device *rdev) 2669{ 2670 struct r100_mc_save save; 2671 u32 status, tmp; 2672 int ret = 0; 2673 2674 status = RREG32(R_000E40_RBBM_STATUS); 2675 if (!G_000E40_GUI_ACTIVE(status)) { 2676 return 0; 2677 } 2678 r100_mc_stop(rdev, &save); 2679 status = RREG32(R_000E40_RBBM_STATUS); 2680 dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); 2681 /* stop CP */ 2682 WREG32(RADEON_CP_CSQ_CNTL, 0); 2683 tmp = RREG32(RADEON_CP_RB_CNTL); 2684 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); 2685 WREG32(RADEON_CP_RB_RPTR_WR, 0); 2686 WREG32(RADEON_CP_RB_WPTR, 0); 2687 WREG32(RADEON_CP_RB_CNTL, tmp); 2688 /* save PCI state */ 2689 pci_save_state(device_get_parent(rdev->dev)); 2690 /* disable bus mastering */ 2691 r100_bm_disable(rdev); 2692 WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) | 2693 S_0000F0_SOFT_RESET_RE(1) | 2694 S_0000F0_SOFT_RESET_PP(1) | 2695 S_0000F0_SOFT_RESET_RB(1)); 2696 RREG32(R_0000F0_RBBM_SOFT_RESET); 2697 mdelay(500); 2698 WREG32(R_0000F0_RBBM_SOFT_RESET, 0); 2699 mdelay(1); 2700 status = RREG32(R_000E40_RBBM_STATUS); 2701 dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); 2702 /* reset CP */ 2703 WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1)); 2704 RREG32(R_0000F0_RBBM_SOFT_RESET); 2705 mdelay(500); 2706 WREG32(R_0000F0_RBBM_SOFT_RESET, 0); 2707 mdelay(1); 2708 status = RREG32(R_000E40_RBBM_STATUS); 2709 dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); 2710 /* restore PCI & busmastering */ 2711 pci_restore_state(device_get_parent(rdev->dev)); 2712 r100_enable_bm(rdev); 2713 /* Check if GPU is idle */ 2714 if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) || 2715 G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { 2716 dev_err(rdev->dev, "failed to reset GPU\n"); 2717 ret = -1; 2718 } else 2719 dev_info(rdev->dev, "GPU reset succeed\n"); 2720 r100_mc_resume(rdev, &save); 2721 return ret; 2722} 2723 2724void r100_set_common_regs(struct radeon_device *rdev) 2725{ 2726 struct drm_device *dev = rdev->ddev; 2727 bool force_dac2 = false; 2728 u32 tmp; 2729 2730 /* set these so they don't interfere with anything */ 2731 WREG32(RADEON_OV0_SCALE_CNTL, 0); 2732 WREG32(RADEON_SUBPIC_CNTL, 0); 2733 WREG32(RADEON_VIPH_CONTROL, 0); 2734 WREG32(RADEON_I2C_CNTL_1, 0); 2735 WREG32(RADEON_DVI_I2C_CNTL_1, 0); 2736 WREG32(RADEON_CAP0_TRIG_CNTL, 0); 2737 WREG32(RADEON_CAP1_TRIG_CNTL, 0); 2738 2739 /* always set up dac2 on rn50 and some rv100 as lots 2740 * of servers seem to wire it up to a VGA port but 2741 * don't report it in the bios connector 2742 * table. 2743 */ 2744 switch (dev->pci_device) { 2745 /* RN50 */ 2746 case 0x515e: 2747 case 0x5969: 2748 force_dac2 = true; 2749 break; 2750 /* RV100*/ 2751 case 0x5159: 2752 case 0x515a: 2753 /* DELL triple head servers */ 2754 if ((dev->pci_subvendor == 0x1028 /* DELL */) && 2755 ((dev->pci_subdevice == 0x016c) || 2756 (dev->pci_subdevice == 0x016d) || 2757 (dev->pci_subdevice == 0x016e) || 2758 (dev->pci_subdevice == 0x016f) || 2759 (dev->pci_subdevice == 0x0170) || 2760 (dev->pci_subdevice == 0x017d) || 2761 (dev->pci_subdevice == 0x017e) || 2762 (dev->pci_subdevice == 0x0183) || 2763 (dev->pci_subdevice == 0x018a) || 2764 (dev->pci_subdevice == 0x019a))) 2765 force_dac2 = true; 2766 break; 2767 } 2768 2769 if (force_dac2) { 2770 u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG); 2771 u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL); 2772 u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2); 2773 2774 /* For CRT on DAC2, don't turn it on if BIOS didn't 2775 enable it, even it's detected. 2776 */ 2777 2778 /* force it to crtc0 */ 2779 dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL; 2780 dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL; 2781 disp_hw_debug |= RADEON_CRT2_DISP1_SEL; 2782 2783 /* set up the TV DAC */ 2784 tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL | 2785 RADEON_TV_DAC_STD_MASK | 2786 RADEON_TV_DAC_RDACPD | 2787 RADEON_TV_DAC_GDACPD | 2788 RADEON_TV_DAC_BDACPD | 2789 RADEON_TV_DAC_BGADJ_MASK | 2790 RADEON_TV_DAC_DACADJ_MASK); 2791 tv_dac_cntl |= (RADEON_TV_DAC_NBLANK | 2792 RADEON_TV_DAC_NHOLD | 2793 RADEON_TV_DAC_STD_PS2 | 2794 (0x58 << 16)); 2795 2796 WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl); 2797 WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug); 2798 WREG32(RADEON_DAC_CNTL2, dac2_cntl); 2799 } 2800 2801 /* switch PM block to ACPI mode */ 2802 tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL); 2803 tmp &= ~RADEON_PM_MODE_SEL; 2804 WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp); 2805 2806} 2807 2808/* 2809 * VRAM info 2810 */ 2811static void r100_vram_get_type(struct radeon_device *rdev) 2812{ 2813 uint32_t tmp; 2814 2815 rdev->mc.vram_is_ddr = false; 2816 if (rdev->flags & RADEON_IS_IGP) 2817 rdev->mc.vram_is_ddr = true; 2818 else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) 2819 rdev->mc.vram_is_ddr = true; 2820 if ((rdev->family == CHIP_RV100) || 2821 (rdev->family == CHIP_RS100) || 2822 (rdev->family == CHIP_RS200)) { 2823 tmp = RREG32(RADEON_MEM_CNTL); 2824 if (tmp & RV100_HALF_MODE) { 2825 rdev->mc.vram_width = 32; 2826 } else { 2827 rdev->mc.vram_width = 64; 2828 } 2829 if (rdev->flags & RADEON_SINGLE_CRTC) { 2830 rdev->mc.vram_width /= 4; 2831 rdev->mc.vram_is_ddr = true; 2832 } 2833 } else if (rdev->family <= CHIP_RV280) { 2834 tmp = RREG32(RADEON_MEM_CNTL); 2835 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { 2836 rdev->mc.vram_width = 128; 2837 } else { 2838 rdev->mc.vram_width = 64; 2839 } 2840 } else { 2841 /* newer IGPs */ 2842 rdev->mc.vram_width = 128; 2843 } 2844} 2845 2846static u32 r100_get_accessible_vram(struct radeon_device *rdev) 2847{ 2848 u32 aper_size; 2849 u8 byte; 2850 2851 aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 2852 2853 /* Set HDP_APER_CNTL only on cards that are known not to be broken, 2854 * that is has the 2nd generation multifunction PCI interface 2855 */ 2856 if (rdev->family == CHIP_RV280 || 2857 rdev->family >= CHIP_RV350) { 2858 WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, 2859 ~RADEON_HDP_APER_CNTL); 2860 DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); 2861 return aper_size * 2; 2862 } 2863 2864 /* Older cards have all sorts of funny issues to deal with. First 2865 * check if it's a multifunction card by reading the PCI config 2866 * header type... Limit those to one aperture size 2867 */ 2868 byte = pci_read_config(rdev->dev, 0xe, 1); 2869 if (byte & 0x80) { 2870 DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); 2871 DRM_INFO("Limiting VRAM to one aperture\n"); 2872 return aper_size; 2873 } 2874 2875 /* Single function older card. We read HDP_APER_CNTL to see how the BIOS 2876 * have set it up. We don't write this as it's broken on some ASICs but 2877 * we expect the BIOS to have done the right thing (might be too optimistic...) 2878 */ 2879 if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) 2880 return aper_size * 2; 2881 return aper_size; 2882} 2883 2884void r100_vram_init_sizes(struct radeon_device *rdev) 2885{ 2886 u64 config_aper_size; 2887 2888 /* work out accessible VRAM */ 2889 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0); 2890 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0); 2891 rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev); 2892 /* FIXME we don't use the second aperture yet when we could use it */ 2893 if (rdev->mc.visible_vram_size > rdev->mc.aper_size) 2894 rdev->mc.visible_vram_size = rdev->mc.aper_size; 2895 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 2896 if (rdev->flags & RADEON_IS_IGP) { 2897 uint32_t tom; 2898 /* read NB_TOM to get the amount of ram stolen for the GPU */ 2899 tom = RREG32(RADEON_NB_TOM); 2900 rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); 2901 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 2902 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 2903 } else { 2904 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 2905 /* Some production boards of m6 will report 0 2906 * if it's 8 MB 2907 */ 2908 if (rdev->mc.real_vram_size == 0) { 2909 rdev->mc.real_vram_size = 8192 * 1024; 2910 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); 2911 } 2912 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 2913 * Novell bug 204882 + along with lots of ubuntu ones 2914 */ 2915 if (rdev->mc.aper_size > config_aper_size) 2916 config_aper_size = rdev->mc.aper_size; 2917 2918 if (config_aper_size > rdev->mc.real_vram_size) 2919 rdev->mc.mc_vram_size = config_aper_size; 2920 else 2921 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 2922 } 2923} 2924 2925void r100_vga_set_state(struct radeon_device *rdev, bool state) 2926{ 2927 uint32_t temp; 2928 2929 temp = RREG32(RADEON_CONFIG_CNTL); 2930 if (state == false) { 2931 temp &= ~RADEON_CFG_VGA_RAM_EN; 2932 temp |= RADEON_CFG_VGA_IO_DIS; 2933 } else { 2934 temp &= ~RADEON_CFG_VGA_IO_DIS; 2935 } 2936 WREG32(RADEON_CONFIG_CNTL, temp); 2937} 2938 2939static void r100_mc_init(struct radeon_device *rdev) 2940{ 2941 u64 base; 2942 2943 r100_vram_get_type(rdev); 2944 r100_vram_init_sizes(rdev); 2945 base = rdev->mc.aper_base; 2946 if (rdev->flags & RADEON_IS_IGP) 2947 base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16; 2948 radeon_vram_location(rdev, &rdev->mc, base); 2949 rdev->mc.gtt_base_align = 0; 2950 if (!(rdev->flags & RADEON_IS_AGP)) 2951 radeon_gtt_location(rdev, &rdev->mc); 2952 radeon_update_bandwidth_info(rdev); 2953} 2954 2955 2956/* 2957 * Indirect registers accessor 2958 */ 2959void r100_pll_errata_after_index(struct radeon_device *rdev) 2960{ 2961 if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) { 2962 (void)RREG32(RADEON_CLOCK_CNTL_DATA); 2963 (void)RREG32(RADEON_CRTC_GEN_CNTL); 2964 } 2965} 2966 2967static void r100_pll_errata_after_data(struct radeon_device *rdev) 2968{ 2969 /* This workarounds is necessary on RV100, RS100 and RS200 chips 2970 * or the chip could hang on a subsequent access 2971 */ 2972 if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { 2973 mdelay(5); 2974 } 2975 2976 /* This function is required to workaround a hardware bug in some (all?) 2977 * revisions of the R300. This workaround should be called after every 2978 * CLOCK_CNTL_INDEX register access. If not, register reads afterward 2979 * may not be correct. 2980 */ 2981 if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { 2982 uint32_t save, tmp; 2983 2984 save = RREG32(RADEON_CLOCK_CNTL_INDEX); 2985 tmp = save & ~(0x3f | RADEON_PLL_WR_EN); 2986 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); 2987 tmp = RREG32(RADEON_CLOCK_CNTL_DATA); 2988 WREG32(RADEON_CLOCK_CNTL_INDEX, save); 2989 } 2990} 2991 2992uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) 2993{ 2994 uint32_t data; 2995 2996 WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); 2997 r100_pll_errata_after_index(rdev); 2998 data = RREG32(RADEON_CLOCK_CNTL_DATA); 2999 r100_pll_errata_after_data(rdev); 3000 return data; 3001} 3002 3003void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 3004{ 3005 WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); 3006 r100_pll_errata_after_index(rdev); 3007 WREG32(RADEON_CLOCK_CNTL_DATA, v); 3008 r100_pll_errata_after_data(rdev); 3009} 3010 3011static void r100_set_safe_registers(struct radeon_device *rdev) 3012{ 3013 if (ASIC_IS_RN50(rdev)) { 3014 rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; 3015 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); 3016 } else if (rdev->family < CHIP_R200) { 3017 rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; 3018 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); 3019 } else { 3020 r200_set_safe_registers(rdev); 3021 } 3022} 3023 3024/* 3025 * Debugfs info 3026 */ 3027#if defined(CONFIG_DEBUG_FS) 3028static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) 3029{ 3030 struct drm_info_node *node = (struct drm_info_node *) m->private; 3031 struct drm_device *dev = node->minor->dev; 3032 struct radeon_device *rdev = dev->dev_private; 3033 uint32_t reg, value; 3034 unsigned i; 3035 3036 seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); 3037 seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); 3038 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 3039 for (i = 0; i < 64; i++) { 3040 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); 3041 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; 3042 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); 3043 value = RREG32(RADEON_RBBM_CMDFIFO_DATA); 3044 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); 3045 } 3046 return 0; 3047} 3048 3049static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) 3050{ 3051 struct drm_info_node *node = (struct drm_info_node *) m->private; 3052 struct drm_device *dev = node->minor->dev; 3053 struct radeon_device *rdev = dev->dev_private; 3054 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3055 uint32_t rdp, wdp; 3056 unsigned count, i, j; 3057 3058 radeon_ring_free_size(rdev, ring); 3059 rdp = RREG32(RADEON_CP_RB_RPTR); 3060 wdp = RREG32(RADEON_CP_RB_WPTR); 3061 count = (rdp + ring->ring_size - wdp) & ring->ptr_mask; 3062 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 3063 seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); 3064 seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); 3065 seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); 3066 seq_printf(m, "%u dwords in ring\n", count); 3067 for (j = 0; j <= count; j++) { 3068 i = (rdp + j) & ring->ptr_mask; 3069 seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); 3070 } 3071 return 0; 3072} 3073 3074 3075static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) 3076{ 3077 struct drm_info_node *node = (struct drm_info_node *) m->private; 3078 struct drm_device *dev = node->minor->dev; 3079 struct radeon_device *rdev = dev->dev_private; 3080 uint32_t csq_stat, csq2_stat, tmp; 3081 unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; 3082 unsigned i; 3083 3084 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); 3085 seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); 3086 csq_stat = RREG32(RADEON_CP_CSQ_STAT); 3087 csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); 3088 r_rptr = (csq_stat >> 0) & 0x3ff; 3089 r_wptr = (csq_stat >> 10) & 0x3ff; 3090 ib1_rptr = (csq_stat >> 20) & 0x3ff; 3091 ib1_wptr = (csq2_stat >> 0) & 0x3ff; 3092 ib2_rptr = (csq2_stat >> 10) & 0x3ff; 3093 ib2_wptr = (csq2_stat >> 20) & 0x3ff; 3094 seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); 3095 seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); 3096 seq_printf(m, "Ring rptr %u\n", r_rptr); 3097 seq_printf(m, "Ring wptr %u\n", r_wptr); 3098 seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); 3099 seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); 3100 seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); 3101 seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); 3102 /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms 3103 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ 3104 seq_printf(m, "Ring fifo:\n"); 3105 for (i = 0; i < 256; i++) { 3106 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 3107 tmp = RREG32(RADEON_CP_CSQ_DATA); 3108 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); 3109 } 3110 seq_printf(m, "Indirect1 fifo:\n"); 3111 for (i = 256; i <= 512; i++) { 3112 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 3113 tmp = RREG32(RADEON_CP_CSQ_DATA); 3114 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); 3115 } 3116 seq_printf(m, "Indirect2 fifo:\n"); 3117 for (i = 640; i < ib1_wptr; i++) { 3118 WREG32(RADEON_CP_CSQ_ADDR, i << 2); 3119 tmp = RREG32(RADEON_CP_CSQ_DATA); 3120 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); 3121 } 3122 return 0; 3123} 3124 3125static int r100_debugfs_mc_info(struct seq_file *m, void *data) 3126{ 3127 struct drm_info_node *node = (struct drm_info_node *) m->private; 3128 struct drm_device *dev = node->minor->dev; 3129 struct radeon_device *rdev = dev->dev_private; 3130 uint32_t tmp; 3131 3132 tmp = RREG32(RADEON_CONFIG_MEMSIZE); 3133 seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); 3134 tmp = RREG32(RADEON_MC_FB_LOCATION); 3135 seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); 3136 tmp = RREG32(RADEON_BUS_CNTL); 3137 seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); 3138 tmp = RREG32(RADEON_MC_AGP_LOCATION); 3139 seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); 3140 tmp = RREG32(RADEON_AGP_BASE); 3141 seq_printf(m, "AGP_BASE 0x%08x\n", tmp); 3142 tmp = RREG32(RADEON_HOST_PATH_CNTL); 3143 seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); 3144 tmp = RREG32(0x01D0); 3145 seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); 3146 tmp = RREG32(RADEON_AIC_LO_ADDR); 3147 seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); 3148 tmp = RREG32(RADEON_AIC_HI_ADDR); 3149 seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); 3150 tmp = RREG32(0x01E4); 3151 seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); 3152 return 0; 3153} 3154 3155static struct drm_info_list r100_debugfs_rbbm_list[] = { 3156 {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, 3157}; 3158 3159static struct drm_info_list r100_debugfs_cp_list[] = { 3160 {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, 3161 {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, 3162}; 3163 3164static struct drm_info_list r100_debugfs_mc_info_list[] = { 3165 {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, 3166}; 3167#endif 3168 3169int r100_debugfs_rbbm_init(struct radeon_device *rdev) 3170{ 3171#if defined(CONFIG_DEBUG_FS) 3172 return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); 3173#else 3174 return 0; 3175#endif 3176} 3177 3178int r100_debugfs_cp_init(struct radeon_device *rdev) 3179{ 3180#if defined(CONFIG_DEBUG_FS) 3181 return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); 3182#else 3183 return 0; 3184#endif 3185} 3186 3187int r100_debugfs_mc_info_init(struct radeon_device *rdev) 3188{ 3189#if defined(CONFIG_DEBUG_FS) 3190 return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); 3191#else 3192 return 0; 3193#endif 3194} 3195 3196int r100_set_surface_reg(struct radeon_device *rdev, int reg, 3197 uint32_t tiling_flags, uint32_t pitch, 3198 uint32_t offset, uint32_t obj_size) 3199{ 3200 int surf_index = reg * 16; 3201 int flags = 0; 3202 3203 if (rdev->family <= CHIP_RS200) { 3204 if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 3205 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) 3206 flags |= RADEON_SURF_TILE_COLOR_BOTH; 3207 if (tiling_flags & RADEON_TILING_MACRO) 3208 flags |= RADEON_SURF_TILE_COLOR_MACRO; 3209 } else if (rdev->family <= CHIP_RV280) { 3210 if (tiling_flags & (RADEON_TILING_MACRO)) 3211 flags |= R200_SURF_TILE_COLOR_MACRO; 3212 if (tiling_flags & RADEON_TILING_MICRO) 3213 flags |= R200_SURF_TILE_COLOR_MICRO; 3214 } else { 3215 if (tiling_flags & RADEON_TILING_MACRO) 3216 flags |= R300_SURF_TILE_MACRO; 3217 if (tiling_flags & RADEON_TILING_MICRO) 3218 flags |= R300_SURF_TILE_MICRO; 3219 } 3220 3221 if (tiling_flags & RADEON_TILING_SWAP_16BIT) 3222 flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; 3223 if (tiling_flags & RADEON_TILING_SWAP_32BIT) 3224 flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; 3225 3226 /* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */ 3227 if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) { 3228 if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) 3229 if (ASIC_IS_RN50(rdev)) 3230 pitch /= 16; 3231 } 3232 3233 /* r100/r200 divide by 16 */ 3234 if (rdev->family < CHIP_R300) 3235 flags |= pitch / 16; 3236 else 3237 flags |= pitch / 8; 3238 3239 3240 DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); 3241 WREG32(RADEON_SURFACE0_INFO + surf_index, flags); 3242 WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); 3243 WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); 3244 return 0; 3245} 3246 3247void r100_clear_surface_reg(struct radeon_device *rdev, int reg) 3248{ 3249 int surf_index = reg * 16; 3250 WREG32(RADEON_SURFACE0_INFO + surf_index, 0); 3251} 3252 3253void r100_bandwidth_update(struct radeon_device *rdev) 3254{ 3255 fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; 3256 fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; 3257 fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; 3258 uint32_t temp, data, mem_trcd, mem_trp, mem_tras; 3259 fixed20_12 memtcas_ff[8] = { 3260 dfixed_init(1), 3261 dfixed_init(2), 3262 dfixed_init(3), 3263 dfixed_init(0), 3264 dfixed_init_half(1), 3265 dfixed_init_half(2), 3266 dfixed_init(0), 3267 }; 3268 fixed20_12 memtcas_rs480_ff[8] = { 3269 dfixed_init(0), 3270 dfixed_init(1), 3271 dfixed_init(2), 3272 dfixed_init(3), 3273 dfixed_init(0), 3274 dfixed_init_half(1), 3275 dfixed_init_half(2), 3276 dfixed_init_half(3), 3277 }; 3278 fixed20_12 memtcas2_ff[8] = { 3279 dfixed_init(0), 3280 dfixed_init(1), 3281 dfixed_init(2), 3282 dfixed_init(3), 3283 dfixed_init(4), 3284 dfixed_init(5), 3285 dfixed_init(6), 3286 dfixed_init(7), 3287 }; 3288 fixed20_12 memtrbs[8] = { 3289 dfixed_init(1), 3290 dfixed_init_half(1), 3291 dfixed_init(2), 3292 dfixed_init_half(2), 3293 dfixed_init(3), 3294 dfixed_init_half(3), 3295 dfixed_init(4), 3296 dfixed_init_half(4) 3297 }; 3298 fixed20_12 memtrbs_r4xx[8] = { 3299 dfixed_init(4), 3300 dfixed_init(5), 3301 dfixed_init(6), 3302 dfixed_init(7), 3303 dfixed_init(8), 3304 dfixed_init(9), 3305 dfixed_init(10), 3306 dfixed_init(11) 3307 }; 3308 fixed20_12 min_mem_eff; 3309 fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; 3310 fixed20_12 cur_latency_mclk, cur_latency_sclk; 3311 fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, 3312 disp_drain_rate2, read_return_rate; 3313 fixed20_12 time_disp1_drop_priority; 3314 int c; 3315 int cur_size = 16; /* in octawords */ 3316 int critical_point = 0, critical_point2; 3317/* uint32_t read_return_rate, time_disp1_drop_priority; */ 3318 int stop_req, max_stop_req; 3319 struct drm_display_mode *mode1 = NULL; 3320 struct drm_display_mode *mode2 = NULL; 3321 uint32_t pixel_bytes1 = 0; 3322 uint32_t pixel_bytes2 = 0; 3323 3324 radeon_update_display_priority(rdev); 3325 3326 if (rdev->mode_info.crtcs[0]->base.enabled) { 3327 mode1 = &rdev->mode_info.crtcs[0]->base.mode; 3328 pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; 3329 } 3330 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3331 if (rdev->mode_info.crtcs[1]->base.enabled) { 3332 mode2 = &rdev->mode_info.crtcs[1]->base.mode; 3333 pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; 3334 } 3335 } 3336 3337 min_mem_eff.full = dfixed_const_8(0); 3338 /* get modes */ 3339 if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { 3340 uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); 3341 mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); 3342 mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); 3343 /* check crtc enables */ 3344 if (mode2) 3345 mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); 3346 if (mode1) 3347 mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); 3348 WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); 3349 } 3350 3351 /* 3352 * determine is there is enough bw for current mode 3353 */ 3354 sclk_ff = rdev->pm.sclk; 3355 mclk_ff = rdev->pm.mclk; 3356 3357 temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); 3358 temp_ff.full = dfixed_const(temp); 3359 mem_bw.full = dfixed_mul(mclk_ff, temp_ff); 3360 3361 pix_clk.full = 0; 3362 pix_clk2.full = 0; 3363 peak_disp_bw.full = 0; 3364 if (mode1) { 3365 temp_ff.full = dfixed_const(1000); 3366 pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */ 3367 pix_clk.full = dfixed_div(pix_clk, temp_ff); 3368 temp_ff.full = dfixed_const(pixel_bytes1); 3369 peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff); 3370 } 3371 if (mode2) { 3372 temp_ff.full = dfixed_const(1000); 3373 pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */ 3374 pix_clk2.full = dfixed_div(pix_clk2, temp_ff); 3375 temp_ff.full = dfixed_const(pixel_bytes2); 3376 peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff); 3377 } 3378 3379 mem_bw.full = dfixed_mul(mem_bw, min_mem_eff); 3380 if (peak_disp_bw.full >= mem_bw.full) { 3381 DRM_ERROR("You may not have enough display bandwidth for current mode\n" 3382 "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); 3383 } 3384 3385 /* Get values from the EXT_MEM_CNTL register...converting its contents. */ 3386 temp = RREG32(RADEON_MEM_TIMING_CNTL); 3387 if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ 3388 mem_trcd = ((temp >> 2) & 0x3) + 1; 3389 mem_trp = ((temp & 0x3)) + 1; 3390 mem_tras = ((temp & 0x70) >> 4) + 1; 3391 } else if (rdev->family == CHIP_R300 || 3392 rdev->family == CHIP_R350) { /* r300, r350 */ 3393 mem_trcd = (temp & 0x7) + 1; 3394 mem_trp = ((temp >> 8) & 0x7) + 1; 3395 mem_tras = ((temp >> 11) & 0xf) + 4; 3396 } else if (rdev->family == CHIP_RV350 || 3397 rdev->family <= CHIP_RV380) { 3398 /* rv3x0 */ 3399 mem_trcd = (temp & 0x7) + 3; 3400 mem_trp = ((temp >> 8) & 0x7) + 3; 3401 mem_tras = ((temp >> 11) & 0xf) + 6; 3402 } else if (rdev->family == CHIP_R420 || 3403 rdev->family == CHIP_R423 || 3404 rdev->family == CHIP_RV410) { 3405 /* r4xx */ 3406 mem_trcd = (temp & 0xf) + 3; 3407 if (mem_trcd > 15) 3408 mem_trcd = 15; 3409 mem_trp = ((temp >> 8) & 0xf) + 3; 3410 if (mem_trp > 15) 3411 mem_trp = 15; 3412 mem_tras = ((temp >> 12) & 0x1f) + 6; 3413 if (mem_tras > 31) 3414 mem_tras = 31; 3415 } else { /* RV200, R200 */ 3416 mem_trcd = (temp & 0x7) + 1; 3417 mem_trp = ((temp >> 8) & 0x7) + 1; 3418 mem_tras = ((temp >> 12) & 0xf) + 4; 3419 } 3420 /* convert to FF */ 3421 trcd_ff.full = dfixed_const(mem_trcd); 3422 trp_ff.full = dfixed_const(mem_trp); 3423 tras_ff.full = dfixed_const(mem_tras); 3424 3425 /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ 3426 temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); 3427 data = (temp & (7 << 20)) >> 20; 3428 if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { 3429 if (rdev->family == CHIP_RS480) /* don't think rs400 */ 3430 tcas_ff = memtcas_rs480_ff[data]; 3431 else 3432 tcas_ff = memtcas_ff[data]; 3433 } else 3434 tcas_ff = memtcas2_ff[data]; 3435 3436 if (rdev->family == CHIP_RS400 || 3437 rdev->family == CHIP_RS480) { 3438 /* extra cas latency stored in bits 23-25 0-4 clocks */ 3439 data = (temp >> 23) & 0x7; 3440 if (data < 5) 3441 tcas_ff.full += dfixed_const(data); 3442 } 3443 3444 if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { 3445 /* on the R300, Tcas is included in Trbs. 3446 */ 3447 temp = RREG32(RADEON_MEM_CNTL); 3448 data = (R300_MEM_NUM_CHANNELS_MASK & temp); 3449 if (data == 1) { 3450 if (R300_MEM_USE_CD_CH_ONLY & temp) { 3451 temp = RREG32(R300_MC_IND_INDEX); 3452 temp &= ~R300_MC_IND_ADDR_MASK; 3453 temp |= R300_MC_READ_CNTL_CD_mcind; 3454 WREG32(R300_MC_IND_INDEX, temp); 3455 temp = RREG32(R300_MC_IND_DATA); 3456 data = (R300_MEM_RBS_POSITION_C_MASK & temp); 3457 } else { 3458 temp = RREG32(R300_MC_READ_CNTL_AB); 3459 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 3460 } 3461 } else { 3462 temp = RREG32(R300_MC_READ_CNTL_AB); 3463 data = (R300_MEM_RBS_POSITION_A_MASK & temp); 3464 } 3465 if (rdev->family == CHIP_RV410 || 3466 rdev->family == CHIP_R420 || 3467 rdev->family == CHIP_R423) 3468 trbs_ff = memtrbs_r4xx[data]; 3469 else 3470 trbs_ff = memtrbs[data]; 3471 tcas_ff.full += trbs_ff.full; 3472 } 3473 3474 sclk_eff_ff.full = sclk_ff.full; 3475 3476 if (rdev->flags & RADEON_IS_AGP) { 3477 fixed20_12 agpmode_ff; 3478 agpmode_ff.full = dfixed_const(radeon_agpmode); 3479 temp_ff.full = dfixed_const_666(16); 3480 sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff); 3481 } 3482 /* TODO PCIE lanes may affect this - agpmode == 16?? */ 3483 3484 if (ASIC_IS_R300(rdev)) { 3485 sclk_delay_ff.full = dfixed_const(250); 3486 } else { 3487 if ((rdev->family == CHIP_RV100) || 3488 rdev->flags & RADEON_IS_IGP) { 3489 if (rdev->mc.vram_is_ddr) 3490 sclk_delay_ff.full = dfixed_const(41); 3491 else 3492 sclk_delay_ff.full = dfixed_const(33); 3493 } else { 3494 if (rdev->mc.vram_width == 128) 3495 sclk_delay_ff.full = dfixed_const(57); 3496 else 3497 sclk_delay_ff.full = dfixed_const(41); 3498 } 3499 } 3500 3501 mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff); 3502 3503 if (rdev->mc.vram_is_ddr) { 3504 if (rdev->mc.vram_width == 32) { 3505 k1.full = dfixed_const(40); 3506 c = 3; 3507 } else { 3508 k1.full = dfixed_const(20); 3509 c = 1; 3510 } 3511 } else { 3512 k1.full = dfixed_const(40); 3513 c = 3; 3514 } 3515 3516 temp_ff.full = dfixed_const(2); 3517 mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff); 3518 temp_ff.full = dfixed_const(c); 3519 mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff); 3520 temp_ff.full = dfixed_const(4); 3521 mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff); 3522 mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff); 3523 mc_latency_mclk.full += k1.full; 3524 3525 mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff); 3526 mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff); 3527 3528 /* 3529 HW cursor time assuming worst case of full size colour cursor. 3530 */ 3531 temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); 3532 temp_ff.full += trcd_ff.full; 3533 if (temp_ff.full < tras_ff.full) 3534 temp_ff.full = tras_ff.full; 3535 cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff); 3536 3537 temp_ff.full = dfixed_const(cur_size); 3538 cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff); 3539 /* 3540 Find the total latency for the display data. 3541 */ 3542 disp_latency_overhead.full = dfixed_const(8); 3543 disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff); 3544 mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; 3545 mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; 3546 3547 if (mc_latency_mclk.full > mc_latency_sclk.full) 3548 disp_latency.full = mc_latency_mclk.full; 3549 else 3550 disp_latency.full = mc_latency_sclk.full; 3551 3552 /* setup Max GRPH_STOP_REQ default value */ 3553 if (ASIC_IS_RV100(rdev)) 3554 max_stop_req = 0x5c; 3555 else 3556 max_stop_req = 0x7c; 3557 3558 if (mode1) { 3559 /* CRTC1 3560 Set GRPH_BUFFER_CNTL register using h/w defined optimal values. 3561 GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] 3562 */ 3563 stop_req = mode1->hdisplay * pixel_bytes1 / 16; 3564 3565 if (stop_req > max_stop_req) 3566 stop_req = max_stop_req; 3567 3568 /* 3569 Find the drain rate of the display buffer. 3570 */ 3571 temp_ff.full = dfixed_const((16/pixel_bytes1)); 3572 disp_drain_rate.full = dfixed_div(pix_clk, temp_ff); 3573 3574 /* 3575 Find the critical point of the display buffer. 3576 */ 3577 crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency); 3578 crit_point_ff.full += dfixed_const_half(0); 3579 3580 critical_point = dfixed_trunc(crit_point_ff); 3581 3582 if (rdev->disp_priority == 2) { 3583 critical_point = 0; 3584 } 3585 3586 /* 3587 The critical point should never be above max_stop_req-4. Setting 3588 GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. 3589 */ 3590 if (max_stop_req - critical_point < 4) 3591 critical_point = 0; 3592 3593 if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { 3594 /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ 3595 critical_point = 0x10; 3596 } 3597 3598 temp = RREG32(RADEON_GRPH_BUFFER_CNTL); 3599 temp &= ~(RADEON_GRPH_STOP_REQ_MASK); 3600 temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 3601 temp &= ~(RADEON_GRPH_START_REQ_MASK); 3602 if ((rdev->family == CHIP_R350) && 3603 (stop_req > 0x15)) { 3604 stop_req -= 0x10; 3605 } 3606 temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 3607 temp |= RADEON_GRPH_BUFFER_SIZE; 3608 temp &= ~(RADEON_GRPH_CRITICAL_CNTL | 3609 RADEON_GRPH_CRITICAL_AT_SOF | 3610 RADEON_GRPH_STOP_CNTL); 3611 /* 3612 Write the result into the register. 3613 */ 3614 WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 3615 (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 3616 3617#if 0 3618 if ((rdev->family == CHIP_RS400) || 3619 (rdev->family == CHIP_RS480)) { 3620 /* attempt to program RS400 disp regs correctly ??? */ 3621 temp = RREG32(RS400_DISP1_REG_CNTL); 3622 temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | 3623 RS400_DISP1_STOP_REQ_LEVEL_MASK); 3624 WREG32(RS400_DISP1_REQ_CNTL1, (temp | 3625 (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 3626 (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 3627 temp = RREG32(RS400_DMIF_MEM_CNTL1); 3628 temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | 3629 RS400_DISP1_CRITICAL_POINT_STOP_MASK); 3630 WREG32(RS400_DMIF_MEM_CNTL1, (temp | 3631 (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | 3632 (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); 3633 } 3634#endif 3635 3636 DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n", 3637 /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ 3638 (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); 3639 } 3640 3641 if (mode2) { 3642 u32 grph2_cntl; 3643 stop_req = mode2->hdisplay * pixel_bytes2 / 16; 3644 3645 if (stop_req > max_stop_req) 3646 stop_req = max_stop_req; 3647 3648 /* 3649 Find the drain rate of the display buffer. 3650 */ 3651 temp_ff.full = dfixed_const((16/pixel_bytes2)); 3652 disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff); 3653 3654 grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); 3655 grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); 3656 grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); 3657 grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); 3658 if ((rdev->family == CHIP_R350) && 3659 (stop_req > 0x15)) { 3660 stop_req -= 0x10; 3661 } 3662 grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); 3663 grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; 3664 grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | 3665 RADEON_GRPH_CRITICAL_AT_SOF | 3666 RADEON_GRPH_STOP_CNTL); 3667 3668 if ((rdev->family == CHIP_RS100) || 3669 (rdev->family == CHIP_RS200)) 3670 critical_point2 = 0; 3671 else { 3672 temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; 3673 temp_ff.full = dfixed_const(temp); 3674 temp_ff.full = dfixed_mul(mclk_ff, temp_ff); 3675 if (sclk_ff.full < temp_ff.full) 3676 temp_ff.full = sclk_ff.full; 3677 3678 read_return_rate.full = temp_ff.full; 3679 3680 if (mode1) { 3681 temp_ff.full = read_return_rate.full - disp_drain_rate.full; 3682 time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff); 3683 } else { 3684 time_disp1_drop_priority.full = 0; 3685 } 3686 crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; 3687 crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2); 3688 crit_point_ff.full += dfixed_const_half(0); 3689 3690 critical_point2 = dfixed_trunc(crit_point_ff); 3691 3692 if (rdev->disp_priority == 2) { 3693 critical_point2 = 0; 3694 } 3695 3696 if (max_stop_req - critical_point2 < 4) 3697 critical_point2 = 0; 3698 3699 } 3700 3701 if (critical_point2 == 0 && rdev->family == CHIP_R300) { 3702 /* some R300 cards have problem with this set to 0 */ 3703 critical_point2 = 0x10; 3704 } 3705 3706 WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | 3707 (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); 3708 3709 if ((rdev->family == CHIP_RS400) || 3710 (rdev->family == CHIP_RS480)) { 3711#if 0 3712 /* attempt to program RS400 disp2 regs correctly ??? */ 3713 temp = RREG32(RS400_DISP2_REQ_CNTL1); 3714 temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | 3715 RS400_DISP2_STOP_REQ_LEVEL_MASK); 3716 WREG32(RS400_DISP2_REQ_CNTL1, (temp | 3717 (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | 3718 (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); 3719 temp = RREG32(RS400_DISP2_REQ_CNTL2); 3720 temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | 3721 RS400_DISP2_CRITICAL_POINT_STOP_MASK); 3722 WREG32(RS400_DISP2_REQ_CNTL2, (temp | 3723 (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | 3724 (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); 3725#endif 3726 WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); 3727 WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); 3728 WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); 3729 WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); 3730 } 3731 3732 DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n", 3733 (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); 3734 } 3735} 3736 3737int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 3738{ 3739 uint32_t scratch; 3740 uint32_t tmp = 0; 3741 unsigned i; 3742 int r; 3743 3744 r = radeon_scratch_get(rdev, &scratch); 3745 if (r) { 3746 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r); 3747 return r; 3748 } 3749 WREG32(scratch, 0xCAFEDEAD); 3750 r = radeon_ring_lock(rdev, ring, 2); 3751 if (r) { 3752 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); 3753 radeon_scratch_free(rdev, scratch); 3754 return r; 3755 } 3756 radeon_ring_write(ring, PACKET0(scratch, 0)); 3757 radeon_ring_write(ring, 0xDEADBEEF); 3758 radeon_ring_unlock_commit(rdev, ring); 3759 for (i = 0; i < rdev->usec_timeout; i++) { 3760 tmp = RREG32(scratch); 3761 if (tmp == 0xDEADBEEF) { 3762 break; 3763 } 3764 DRM_UDELAY(1); 3765 } 3766 if (i < rdev->usec_timeout) { 3767 DRM_INFO("ring test succeeded in %d usecs\n", i); 3768 } else { 3769 DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n", 3770 scratch, tmp); 3771 r = -EINVAL; 3772 } 3773 radeon_scratch_free(rdev, scratch); 3774 return r; 3775} 3776 3777void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 3778{ 3779 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3780 3781 if (ring->rptr_save_reg) { 3782 u32 next_rptr = ring->wptr + 2 + 3; 3783 radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0)); 3784 radeon_ring_write(ring, next_rptr); 3785 } 3786 3787 radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1)); 3788 radeon_ring_write(ring, ib->gpu_addr); 3789 radeon_ring_write(ring, ib->length_dw); 3790} 3791 3792int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3793{ 3794 struct radeon_ib ib; 3795 uint32_t scratch; 3796 uint32_t tmp = 0; 3797 unsigned i; 3798 int r; 3799 3800 r = radeon_scratch_get(rdev, &scratch); 3801 if (r) { 3802 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r); 3803 return r; 3804 } 3805 WREG32(scratch, 0xCAFEDEAD); 3806 r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256); 3807 if (r) { 3808 DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3809 goto free_scratch; 3810 } 3811 ib.ptr[0] = PACKET0(scratch, 0); 3812 ib.ptr[1] = 0xDEADBEEF; 3813 ib.ptr[2] = PACKET2(0); 3814 ib.ptr[3] = PACKET2(0); 3815 ib.ptr[4] = PACKET2(0); 3816 ib.ptr[5] = PACKET2(0); 3817 ib.ptr[6] = PACKET2(0); 3818 ib.ptr[7] = PACKET2(0); 3819 ib.length_dw = 8; 3820 r = radeon_ib_schedule(rdev, &ib, NULL); 3821 if (r) { 3822 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3823 goto free_ib; 3824 } 3825 r = radeon_fence_wait(ib.fence, false); 3826 if (r) { 3827 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3828 goto free_ib; 3829 } 3830 for (i = 0; i < rdev->usec_timeout; i++) { 3831 tmp = RREG32(scratch); 3832 if (tmp == 0xDEADBEEF) { 3833 break; 3834 } 3835 DRM_UDELAY(1); 3836 } 3837 if (i < rdev->usec_timeout) { 3838 DRM_INFO("ib test succeeded in %u usecs\n", i); 3839 } else { 3840 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", 3841 scratch, tmp); 3842 r = -EINVAL; 3843 } 3844free_ib: 3845 radeon_ib_free(rdev, &ib); 3846free_scratch: 3847 radeon_scratch_free(rdev, scratch); 3848 return r; 3849} 3850 3851void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save) 3852{ 3853 /* Shutdown CP we shouldn't need to do that but better be safe than 3854 * sorry 3855 */ 3856 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 3857 WREG32(R_000740_CP_CSQ_CNTL, 0); 3858 3859 /* Save few CRTC registers */ 3860 save->GENMO_WT = RREG8(R_0003C2_GENMO_WT); 3861 save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL); 3862 save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL); 3863 save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET); 3864 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3865 save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL); 3866 save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET); 3867 } 3868 3869 /* Disable VGA aperture access */ 3870 WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT); 3871 /* Disable cursor, overlay, crtc */ 3872 WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1)); 3873 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL | 3874 S_000054_CRTC_DISPLAY_DIS(1)); 3875 WREG32(R_000050_CRTC_GEN_CNTL, 3876 (C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) | 3877 S_000050_CRTC_DISP_REQ_EN_B(1)); 3878 WREG32(R_000420_OV0_SCALE_CNTL, 3879 C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL)); 3880 WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET); 3881 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3882 WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET | 3883 S_000360_CUR2_LOCK(1)); 3884 WREG32(R_0003F8_CRTC2_GEN_CNTL, 3885 (C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) | 3886 S_0003F8_CRTC2_DISPLAY_DIS(1) | 3887 S_0003F8_CRTC2_DISP_REQ_EN_B(1)); 3888 WREG32(R_000360_CUR2_OFFSET, 3889 C_000360_CUR2_LOCK & save->CUR2_OFFSET); 3890 } 3891} 3892 3893void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save) 3894{ 3895 /* Update base address for crtc */ 3896 WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start); 3897 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3898 WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start); 3899 } 3900 /* Restore CRTC registers */ 3901 WREG8(R_0003C2_GENMO_WT, save->GENMO_WT); 3902 WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL); 3903 WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL); 3904 if (!(rdev->flags & RADEON_SINGLE_CRTC)) { 3905 WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL); 3906 } 3907} 3908 3909void r100_vga_render_disable(struct radeon_device *rdev) 3910{ 3911 u32 tmp; 3912 3913 tmp = RREG8(R_0003C2_GENMO_WT); 3914 WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp); 3915} 3916 3917static void r100_debugfs(struct radeon_device *rdev) 3918{ 3919 int r; 3920 3921 r = r100_debugfs_mc_info_init(rdev); 3922 if (r) 3923 dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n"); 3924} 3925 3926static void r100_mc_program(struct radeon_device *rdev) 3927{ 3928 struct r100_mc_save save; 3929 3930 /* Stops all mc clients */ 3931 r100_mc_stop(rdev, &save); 3932 if (rdev->flags & RADEON_IS_AGP) { 3933 WREG32(R_00014C_MC_AGP_LOCATION, 3934 S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) | 3935 S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16)); 3936 WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base)); 3937 if (rdev->family > CHIP_RV200) 3938 WREG32(R_00015C_AGP_BASE_2, 3939 upper_32_bits(rdev->mc.agp_base) & 0xff); 3940 } else { 3941 WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF); 3942 WREG32(R_000170_AGP_BASE, 0); 3943 if (rdev->family > CHIP_RV200) 3944 WREG32(R_00015C_AGP_BASE_2, 0); 3945 } 3946 /* Wait for mc idle */ 3947 if (r100_mc_wait_for_idle(rdev)) 3948 dev_warn(rdev->dev, "Wait for MC idle timeout.\n"); 3949 /* Program MC, should be a 32bits limited address space */ 3950 WREG32(R_000148_MC_FB_LOCATION, 3951 S_000148_MC_FB_START(rdev->mc.vram_start >> 16) | 3952 S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16)); 3953 r100_mc_resume(rdev, &save); 3954} 3955 3956static void r100_clock_startup(struct radeon_device *rdev) 3957{ 3958 u32 tmp; 3959 3960 if (radeon_dynclks != -1 && radeon_dynclks) 3961 radeon_legacy_set_clock_gating(rdev, 1); 3962 /* We need to force on some of the block */ 3963 tmp = RREG32_PLL(R_00000D_SCLK_CNTL); 3964 tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1); 3965 if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280)) 3966 tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1); 3967 WREG32_PLL(R_00000D_SCLK_CNTL, tmp); 3968} 3969 3970static int r100_startup(struct radeon_device *rdev) 3971{ 3972 int r; 3973 3974 /* set common regs */ 3975 r100_set_common_regs(rdev); 3976 /* program mc */ 3977 r100_mc_program(rdev); 3978 /* Resume clock */ 3979 r100_clock_startup(rdev); 3980 /* Initialize GART (initialize after TTM so we can allocate 3981 * memory through TTM but finalize after TTM) */ 3982 r100_enable_bm(rdev); 3983 if (rdev->flags & RADEON_IS_PCI) { 3984 r = r100_pci_gart_enable(rdev); 3985 if (r) 3986 return r; 3987 } 3988 3989 /* allocate wb buffer */ 3990 r = radeon_wb_init(rdev); 3991 if (r) 3992 return r; 3993 3994 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); 3995 if (r) { 3996 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); 3997 return r; 3998 } 3999 4000 /* Enable IRQ */ 4001 r100_irq_set(rdev); 4002 rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL); 4003 /* 1M ring buffer */ 4004 r = r100_cp_init(rdev, 1024 * 1024); 4005 if (r) { 4006 dev_err(rdev->dev, "failed initializing CP (%d).\n", r); 4007 return r; 4008 } 4009 4010 r = radeon_ib_pool_init(rdev); 4011 if (r) { 4012 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 4013 return r; 4014 } 4015 4016 return 0; 4017} 4018 4019int r100_resume(struct radeon_device *rdev) 4020{ 4021 int r; 4022 4023 /* Make sur GART are not working */ 4024 if (rdev->flags & RADEON_IS_PCI) 4025 r100_pci_gart_disable(rdev); 4026 /* Resume clock before doing reset */ 4027 r100_clock_startup(rdev); 4028 /* Reset gpu before posting otherwise ATOM will enter infinite loop */ 4029 if (radeon_asic_reset(rdev)) { 4030 dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", 4031 RREG32(R_000E40_RBBM_STATUS), 4032 RREG32(R_0007C0_CP_STAT)); 4033 } 4034 /* post */ 4035 radeon_combios_asic_init(rdev->ddev); 4036 /* Resume clock after posting */ 4037 r100_clock_startup(rdev); 4038 /* Initialize surface registers */ 4039 radeon_surface_init(rdev); 4040 4041 rdev->accel_working = true; 4042 r = r100_startup(rdev); 4043 if (r) { 4044 rdev->accel_working = false; 4045 } 4046 return r; 4047} 4048 4049int r100_suspend(struct radeon_device *rdev) 4050{ 4051 r100_cp_disable(rdev); 4052 radeon_wb_disable(rdev); 4053 r100_irq_disable(rdev); 4054 if (rdev->flags & RADEON_IS_PCI) 4055 r100_pci_gart_disable(rdev); 4056 return 0; 4057} 4058 4059void r100_fini(struct radeon_device *rdev) 4060{ 4061 r100_cp_fini(rdev); 4062 radeon_wb_fini(rdev); 4063 radeon_ib_pool_fini(rdev); 4064 radeon_gem_fini(rdev); 4065 if (rdev->flags & RADEON_IS_PCI) 4066 r100_pci_gart_fini(rdev); 4067 radeon_agp_fini(rdev); 4068 radeon_irq_kms_fini(rdev); 4069 radeon_fence_driver_fini(rdev); 4070 radeon_bo_fini(rdev); 4071 radeon_atombios_fini(rdev); 4072 r100_cp_fini_microcode(rdev); 4073 free(rdev->bios, DRM_MEM_DRIVER); 4074 rdev->bios = NULL; 4075} 4076 4077/* 4078 * Due to how kexec works, it can leave the hw fully initialised when it 4079 * boots the new kernel. However doing our init sequence with the CP and 4080 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup 4081 * do some quick sanity checks and restore sane values to avoid this 4082 * problem. 4083 */ 4084void r100_restore_sanity(struct radeon_device *rdev) 4085{ 4086 u32 tmp; 4087 4088 tmp = RREG32(RADEON_CP_CSQ_CNTL); 4089 if (tmp) { 4090 WREG32(RADEON_CP_CSQ_CNTL, 0); 4091 } 4092 tmp = RREG32(RADEON_CP_RB_CNTL); 4093 if (tmp) { 4094 WREG32(RADEON_CP_RB_CNTL, 0); 4095 } 4096 tmp = RREG32(RADEON_SCRATCH_UMSK); 4097 if (tmp) { 4098 WREG32(RADEON_SCRATCH_UMSK, 0); 4099 } 4100} 4101 4102int r100_init(struct radeon_device *rdev) 4103{ 4104 int r; 4105 4106 /* Register debugfs file specific to this group of asics */ 4107 r100_debugfs(rdev); 4108 /* Disable VGA */ 4109 r100_vga_render_disable(rdev); 4110 /* Initialize scratch registers */ 4111 radeon_scratch_init(rdev); 4112 /* Initialize surface registers */ 4113 radeon_surface_init(rdev); 4114 /* sanity check some register to avoid hangs like after kexec */ 4115 r100_restore_sanity(rdev); 4116 /* TODO: disable VGA need to use VGA request */ 4117 /* BIOS*/ 4118 if (!radeon_get_bios(rdev)) { 4119 if (ASIC_IS_AVIVO(rdev)) 4120 return -EINVAL; 4121 } 4122 if (rdev->is_atom_bios) { 4123 dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n"); 4124 return -EINVAL; 4125 } else { 4126 r = radeon_combios_init(rdev); 4127 if (r) 4128 return r; 4129 } 4130 /* Reset gpu before posting otherwise ATOM will enter infinite loop */ 4131 if (radeon_asic_reset(rdev)) { 4132 dev_warn(rdev->dev, 4133 "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n", 4134 RREG32(R_000E40_RBBM_STATUS), 4135 RREG32(R_0007C0_CP_STAT)); 4136 } 4137 /* check if cards are posted or not */ 4138 if (radeon_boot_test_post_card(rdev) == false) 4139 return -EINVAL; 4140 /* Set asic errata */ 4141 r100_errata(rdev); 4142 /* Initialize clocks */ 4143 radeon_get_clock_info(rdev->ddev); 4144 /* initialize AGP */ 4145 if (rdev->flags & RADEON_IS_AGP) { 4146 r = radeon_agp_init(rdev); 4147 if (r) { 4148 radeon_agp_disable(rdev); 4149 } 4150 } 4151 /* initialize VRAM */ 4152 r100_mc_init(rdev); 4153 /* Fence driver */ 4154 r = radeon_fence_driver_init(rdev); 4155 if (r) 4156 return r; 4157 r = radeon_irq_kms_init(rdev); 4158 if (r) 4159 return r; 4160 /* Memory manager */ 4161 r = radeon_bo_init(rdev); 4162 if (r) 4163 return r; 4164 if (rdev->flags & RADEON_IS_PCI) { 4165 r = r100_pci_gart_init(rdev); 4166 if (r) 4167 return r; 4168 } 4169 r100_set_safe_registers(rdev); 4170 4171 rdev->accel_working = true; 4172 r = r100_startup(rdev); 4173 if (r) { 4174 /* Somethings want wront with the accel init stop accel */ 4175 dev_err(rdev->dev, "Disabling GPU acceleration\n"); 4176 r100_cp_fini(rdev); 4177 radeon_wb_fini(rdev); 4178 radeon_ib_pool_fini(rdev); 4179 radeon_irq_kms_fini(rdev); 4180 if (rdev->flags & RADEON_IS_PCI) 4181 r100_pci_gart_fini(rdev); 4182 rdev->accel_working = false; 4183 } 4184 return 0; 4185} 4186 4187uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, 4188 bool always_indirect) 4189{ 4190 if (reg < rdev->rmmio_size && !always_indirect) 4191 return bus_read_4(rdev->rmmio, reg); 4192 else { 4193 unsigned long flags; 4194 uint32_t ret; 4195 4196 DRM_SPINLOCK_IRQSAVE(&rdev->mmio_idx_lock, flags); 4197 bus_write_4(rdev->rmmio, RADEON_MM_INDEX, reg); 4198 ret = bus_read_4(rdev->rmmio, RADEON_MM_DATA); 4199 DRM_SPINUNLOCK_IRQRESTORE(&rdev->mmio_idx_lock, flags); 4200 4201 return ret; 4202 } 4203} 4204 4205void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, 4206 bool always_indirect) 4207{ 4208 if (reg < rdev->rmmio_size && !always_indirect) 4209 bus_write_4(rdev->rmmio, reg, v); 4210 else { 4211 unsigned long flags; 4212 4213 DRM_SPINLOCK_IRQSAVE(&rdev->mmio_idx_lock, flags); 4214 bus_write_4(rdev->rmmio, RADEON_MM_INDEX, reg); 4215 bus_write_4(rdev->rmmio, RADEON_MM_DATA, v); 4216 DRM_SPINUNLOCK_IRQRESTORE(&rdev->mmio_idx_lock, flags); 4217 } 4218} 4219 4220u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) 4221{ 4222 if (reg < rdev->rio_mem_size) 4223 return bus_read_4(rdev->rio_mem, reg); 4224 else { 4225 /* XXX No locking? -- dumbbell@ */ 4226 bus_write_4(rdev->rio_mem, RADEON_MM_INDEX, reg); 4227 return bus_read_4(rdev->rio_mem, RADEON_MM_DATA); 4228 } 4229} 4230 4231void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v) 4232{ 4233 if (reg < rdev->rio_mem_size) 4234 bus_write_4(rdev->rio_mem, reg, v); 4235 else { 4236 /* XXX No locking? -- dumbbell@ */ 4237 bus_write_4(rdev->rio_mem, RADEON_MM_INDEX, reg); 4238 bus_write_4(rdev->rio_mem, RADEON_MM_DATA, v); 4239 } 4240} 4241