r600_cp.c revision 195501
1/*- 2 * Copyright 2008-2009 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Dave Airlie <airlied@redhat.com> 26 * Alex Deucher <alexander.deucher@amd.com> 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/dev/drm/r600_cp.c 195501 2009-07-09 16:39:28Z rnoland $"); 31 32#include "dev/drm/drmP.h" 33#include "dev/drm/drm.h" 34#include "dev/drm/radeon_drm.h" 35#include "dev/drm/radeon_drv.h" 36 37#include "dev/drm/r600_microcode.h" 38 39# define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */ 40# define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1)) 41 42#define R600_PTE_VALID (1 << 0) 43#define R600_PTE_SYSTEM (1 << 1) 44#define R600_PTE_SNOOPED (1 << 2) 45#define R600_PTE_READABLE (1 << 5) 46#define R600_PTE_WRITEABLE (1 << 6) 47 48/* MAX values used for gfx init */ 49#define R6XX_MAX_SH_GPRS 256 50#define R6XX_MAX_TEMP_GPRS 16 51#define R6XX_MAX_SH_THREADS 256 52#define R6XX_MAX_SH_STACK_ENTRIES 4096 53#define R6XX_MAX_BACKENDS 8 54#define R6XX_MAX_BACKENDS_MASK 0xff 55#define R6XX_MAX_SIMDS 8 56#define R6XX_MAX_SIMDS_MASK 0xff 57#define R6XX_MAX_PIPES 8 58#define R6XX_MAX_PIPES_MASK 0xff 59 60#define R7XX_MAX_SH_GPRS 256 61#define R7XX_MAX_TEMP_GPRS 16 62#define R7XX_MAX_SH_THREADS 256 63#define R7XX_MAX_SH_STACK_ENTRIES 4096 64#define R7XX_MAX_BACKENDS 8 65#define R7XX_MAX_BACKENDS_MASK 0xff 66#define R7XX_MAX_SIMDS 16 67#define R7XX_MAX_SIMDS_MASK 0xffff 68#define R7XX_MAX_PIPES 8 69#define R7XX_MAX_PIPES_MASK 0xff 70 71static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries) 72{ 73 int i; 74 75 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; 76 77 for (i = 0; i < dev_priv->usec_timeout; i++) { 78 int slots; 79 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) 80 slots = (RADEON_READ(R600_GRBM_STATUS) 81 & R700_CMDFIFO_AVAIL_MASK); 82 else 83 slots = (RADEON_READ(R600_GRBM_STATUS) 84 & R600_CMDFIFO_AVAIL_MASK); 85 if (slots >= entries) 86 return 0; 87 DRM_UDELAY(1); 88 } 89 DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n", 90 RADEON_READ(R600_GRBM_STATUS), 91 RADEON_READ(R600_GRBM_STATUS2)); 92 93 return -EBUSY; 94} 95 96static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv) 97{ 98 int i, ret; 99 100 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; 101 102 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) 103 ret = r600_do_wait_for_fifo(dev_priv, 8); 104 else 105 ret = r600_do_wait_for_fifo(dev_priv, 16); 106 if (ret) 107 return ret; 108 for (i = 0; i < dev_priv->usec_timeout; i++) { 109 if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE)) 110 return 0; 111 DRM_UDELAY(1); 112 } 113 DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n", 114 RADEON_READ(R600_GRBM_STATUS), 115 RADEON_READ(R600_GRBM_STATUS2)); 116 117 return -EBUSY; 118} 119 120void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info) 121{ 122#ifdef __linux__ 123 struct drm_sg_mem *entry = dev->sg; 124 int max_pages; 125 int pages; 126 int i; 127#endif 128 if (gart_info->bus_addr) { 129#ifdef __linux__ 130 max_pages = (gart_info->table_size / sizeof(u32)); 131 pages = (entry->pages <= max_pages) 132 ? entry->pages : max_pages; 133 134 for (i = 0; i < pages; i++) { 135 if (!entry->busaddr[i]) 136 break; 137 pci_unmap_single(dev->pdev, entry->busaddr[i], 138 PAGE_SIZE, PCI_DMA_TODEVICE); 139 } 140#endif 141 if (gart_info->gart_table_location == DRM_ATI_GART_MAIN) 142 gart_info->bus_addr = 0; 143 } 144} 145 146/* R600 has page table setup */ 147int r600_page_table_init(struct drm_device *dev) 148{ 149 drm_radeon_private_t *dev_priv = dev->dev_private; 150 struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info; 151 struct drm_sg_mem *entry = dev->sg; 152 int ret = 0; 153 int i, j; 154 int max_pages, pages; 155 u64 *pci_gart, page_base; 156 dma_addr_t entry_addr; 157 158 /* okay page table is available - lets rock */ 159 160 /* PTEs are 64-bits */ 161 pci_gart = (u64 *)gart_info->addr; 162 163 max_pages = (gart_info->table_size / sizeof(u64)); 164 pages = (entry->pages <= max_pages) ? entry->pages : max_pages; 165 166 memset(pci_gart, 0, max_pages * sizeof(u64)); 167 168 for (i = 0; i < pages; i++) { 169#ifdef __linux__ 170 entry->busaddr[i] = pci_map_single(dev->pdev, 171 page_address(entry-> 172 pagelist[i]), 173 PAGE_SIZE, PCI_DMA_TODEVICE); 174 if (entry->busaddr[i] == 0) { 175 DRM_ERROR("unable to map PCIGART pages!\n"); 176 r600_page_table_cleanup(dev, gart_info); 177 goto done; 178 } 179#endif 180 entry_addr = entry->busaddr[i]; 181 for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) { 182 page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK; 183 page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED; 184 page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE; 185 186 *pci_gart = page_base; 187 188 if ((i % 128) == 0) 189 DRM_DEBUG("page entry %d: 0x%016llx\n", 190 i, (unsigned long long)page_base); 191 pci_gart++; 192 entry_addr += ATI_PCIGART_PAGE_SIZE; 193 } 194 } 195 ret = 1; 196#ifdef __linux__ 197done: 198#endif 199 return ret; 200} 201 202static void r600_vm_flush_gart_range(struct drm_device *dev) 203{ 204 drm_radeon_private_t *dev_priv = dev->dev_private; 205 u32 resp, countdown = 1000; 206 RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12); 207 RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 208 RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2); 209 210 do { 211 resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE); 212 countdown--; 213 DRM_UDELAY(1); 214 } while (((resp & 0xf0) == 0) && countdown); 215} 216 217static void r600_vm_init(struct drm_device *dev) 218{ 219 drm_radeon_private_t *dev_priv = dev->dev_private; 220 /* initialise the VM to use the page table we constructed up there */ 221 u32 vm_c0, i; 222 u32 mc_rd_a; 223 u32 vm_l2_cntl, vm_l2_cntl3; 224 /* okay set up the PCIE aperture type thingo */ 225 RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12); 226 RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 227 RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); 228 229 /* setup MC RD a */ 230 mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS | 231 R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) | 232 R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY; 233 234 RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a); 235 RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a); 236 237 RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a); 238 RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a); 239 240 RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a); 241 RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a); 242 243 RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a); 244 RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a); 245 246 RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING); 247 RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/); 248 249 RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a); 250 RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a); 251 252 RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE); 253 RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a); 254 255 vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W; 256 vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7); 257 RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl); 258 259 RADEON_WRITE(R600_VM_L2_CNTL2, 0); 260 vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) | 261 R600_VM_L2_CNTL3_BANK_SELECT_1(1) | 262 R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2)); 263 RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3); 264 265 vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT; 266 267 RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0); 268 269 vm_c0 &= ~R600_VM_ENABLE_CONTEXT; 270 271 /* disable all other contexts */ 272 for (i = 1; i < 8; i++) 273 RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0); 274 275 RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12); 276 RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12); 277 RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 278 279 r600_vm_flush_gart_range(dev); 280} 281 282/* load r600 microcode */ 283static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv) 284{ 285 const u32 (*cp)[3]; 286 const u32 *pfp; 287 int i; 288 289 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 290 case CHIP_R600: 291 DRM_INFO("Loading R600 Microcode\n"); 292 cp = R600_cp_microcode; 293 pfp = R600_pfp_microcode; 294 break; 295 case CHIP_RV610: 296 DRM_INFO("Loading RV610 Microcode\n"); 297 cp = RV610_cp_microcode; 298 pfp = RV610_pfp_microcode; 299 break; 300 case CHIP_RV630: 301 DRM_INFO("Loading RV630 Microcode\n"); 302 cp = RV630_cp_microcode; 303 pfp = RV630_pfp_microcode; 304 break; 305 case CHIP_RV620: 306 DRM_INFO("Loading RV620 Microcode\n"); 307 cp = RV620_cp_microcode; 308 pfp = RV620_pfp_microcode; 309 break; 310 case CHIP_RV635: 311 DRM_INFO("Loading RV635 Microcode\n"); 312 cp = RV635_cp_microcode; 313 pfp = RV635_pfp_microcode; 314 break; 315 case CHIP_RV670: 316 DRM_INFO("Loading RV670 Microcode\n"); 317 cp = RV670_cp_microcode; 318 pfp = RV670_pfp_microcode; 319 break; 320 case CHIP_RS780: 321 DRM_INFO("Loading RS780 Microcode\n"); 322 cp = RS780_cp_microcode; 323 pfp = RS780_pfp_microcode; 324 break; 325 default: 326 return; 327 } 328 329 r600_do_cp_stop(dev_priv); 330 331 RADEON_WRITE(R600_CP_RB_CNTL, 332 R600_RB_NO_UPDATE | 333 R600_RB_BLKSZ(15) | 334 R600_RB_BUFSZ(3)); 335 336 RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP); 337 RADEON_READ(R600_GRBM_SOFT_RESET); 338 DRM_UDELAY(15000); 339 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 340 341 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 342 343 for (i = 0; i < PM4_UCODE_SIZE; i++) { 344 RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][0]); 345 RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][1]); 346 RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i][2]); 347 } 348 349 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 350 for (i = 0; i < PFP_UCODE_SIZE; i++) 351 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, pfp[i]); 352 353 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 354 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 355 RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0); 356} 357 358static void r700_vm_init(struct drm_device *dev) 359{ 360 drm_radeon_private_t *dev_priv = dev->dev_private; 361 /* initialise the VM to use the page table we constructed up there */ 362 u32 vm_c0, i; 363 u32 mc_vm_md_l1; 364 u32 vm_l2_cntl, vm_l2_cntl3; 365 /* okay set up the PCIE aperture type thingo */ 366 RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12); 367 RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 368 RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); 369 370 mc_vm_md_l1 = R700_ENABLE_L1_TLB | 371 R700_ENABLE_L1_FRAGMENT_PROCESSING | 372 R700_SYSTEM_ACCESS_MODE_IN_SYS | 373 R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | 374 R700_EFFECTIVE_L1_TLB_SIZE(5) | 375 R700_EFFECTIVE_L1_QUEUE_SIZE(5); 376 377 RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1); 378 RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1); 379 RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1); 380 RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1); 381 RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1); 382 RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1); 383 RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1); 384 385 vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W; 386 vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7); 387 RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl); 388 389 RADEON_WRITE(R600_VM_L2_CNTL2, 0); 390 vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2); 391 RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3); 392 393 vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT; 394 395 RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0); 396 397 vm_c0 &= ~R600_VM_ENABLE_CONTEXT; 398 399 /* disable all other contexts */ 400 for (i = 1; i < 8; i++) 401 RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0); 402 403 RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12); 404 RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12); 405 RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 406 407 r600_vm_flush_gart_range(dev); 408} 409 410/* load r600 microcode */ 411static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv) 412{ 413 const u32 *pfp; 414 const u32 *cp; 415 int i; 416 417 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 418 case CHIP_RV770: 419 DRM_INFO("Loading RV770/RV790 Microcode\n"); 420 pfp = RV770_pfp_microcode; 421 cp = RV770_cp_microcode; 422 break; 423 case CHIP_RV730: 424 case CHIP_RV740: 425 DRM_INFO("Loading RV730/RV740 Microcode\n"); 426 pfp = RV730_pfp_microcode; 427 cp = RV730_cp_microcode; 428 break; 429 case CHIP_RV710: 430 DRM_INFO("Loading RV710 Microcode\n"); 431 pfp = RV710_pfp_microcode; 432 cp = RV710_cp_microcode; 433 break; 434 default: 435 return; 436 } 437 438 r600_do_cp_stop(dev_priv); 439 440 RADEON_WRITE(R600_CP_RB_CNTL, 441 R600_RB_NO_UPDATE | 442 (15 << 8) | 443 (3 << 0)); 444 445 RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP); 446 RADEON_READ(R600_GRBM_SOFT_RESET); 447 DRM_UDELAY(15000); 448 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 449 450 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 451 for (i = 0; i < R700_PFP_UCODE_SIZE; i++) 452 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, pfp[i]); 453 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 454 455 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 456 for (i = 0; i < R700_PM4_UCODE_SIZE; i++) 457 RADEON_WRITE(R600_CP_ME_RAM_DATA, cp[i]); 458 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 459 460 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 461 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 462 RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0); 463} 464 465static void r600_test_writeback(drm_radeon_private_t *dev_priv) 466{ 467 u32 tmp; 468 469 /* Start with assuming that writeback doesn't work */ 470 dev_priv->writeback_works = 0; 471 472 /* Writeback doesn't seem to work everywhere, test it here and possibly 473 * enable it if it appears to work 474 */ 475 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0); 476 477 RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef); 478 479 for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) { 480 u32 val; 481 482 val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1)); 483 if (val == 0xdeadbeef) 484 break; 485 DRM_UDELAY(1); 486 } 487 488 if (tmp < dev_priv->usec_timeout) { 489 dev_priv->writeback_works = 1; 490 DRM_INFO("writeback test succeeded in %d usecs\n", tmp); 491 } else { 492 dev_priv->writeback_works = 0; 493 DRM_INFO("writeback test failed\n"); 494 } 495 if (radeon_no_wb == 1) { 496 dev_priv->writeback_works = 0; 497 DRM_INFO("writeback forced off\n"); 498 } 499 500 if (!dev_priv->writeback_works) { 501 /* Disable writeback to avoid unnecessary bus master transfer */ 502 RADEON_WRITE(R600_CP_RB_CNTL, RADEON_READ(R600_CP_RB_CNTL) | 503 RADEON_RB_NO_UPDATE); 504 RADEON_WRITE(R600_SCRATCH_UMSK, 0); 505 } 506} 507 508int r600_do_engine_reset(struct drm_device *dev) 509{ 510 drm_radeon_private_t *dev_priv = dev->dev_private; 511 u32 cp_ptr, cp_me_cntl, cp_rb_cntl; 512 513 DRM_INFO("Resetting GPU\n"); 514 515 cp_ptr = RADEON_READ(R600_CP_RB_WPTR); 516 cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL); 517 RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT); 518 519 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff); 520 RADEON_READ(R600_GRBM_SOFT_RESET); 521 DRM_UDELAY(50); 522 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 523 RADEON_READ(R600_GRBM_SOFT_RESET); 524 525 RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0); 526 cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL); 527 RADEON_WRITE(R600_CP_RB_CNTL, R600_RB_RPTR_WR_ENA); 528 529 RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr); 530 RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr); 531 RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl); 532 RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl); 533 534 /* Reset the CP ring */ 535 r600_do_cp_reset(dev_priv); 536 537 /* The CP is no longer running after an engine reset */ 538 dev_priv->cp_running = 0; 539 540 /* Reset any pending vertex, indirect buffers */ 541 radeon_freelist_reset(dev); 542 543 return 0; 544 545} 546 547static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes, 548 u32 num_backends, 549 u32 backend_disable_mask) 550{ 551 u32 backend_map = 0; 552 u32 enabled_backends_mask; 553 u32 enabled_backends_count; 554 u32 cur_pipe; 555 u32 swizzle_pipe[R6XX_MAX_PIPES]; 556 u32 cur_backend; 557 u32 i; 558 559 if (num_tile_pipes > R6XX_MAX_PIPES) 560 num_tile_pipes = R6XX_MAX_PIPES; 561 if (num_tile_pipes < 1) 562 num_tile_pipes = 1; 563 if (num_backends > R6XX_MAX_BACKENDS) 564 num_backends = R6XX_MAX_BACKENDS; 565 if (num_backends < 1) 566 num_backends = 1; 567 568 enabled_backends_mask = 0; 569 enabled_backends_count = 0; 570 for (i = 0; i < R6XX_MAX_BACKENDS; ++i) { 571 if (((backend_disable_mask >> i) & 1) == 0) { 572 enabled_backends_mask |= (1 << i); 573 ++enabled_backends_count; 574 } 575 if (enabled_backends_count == num_backends) 576 break; 577 } 578 579 if (enabled_backends_count == 0) { 580 enabled_backends_mask = 1; 581 enabled_backends_count = 1; 582 } 583 584 if (enabled_backends_count != num_backends) 585 num_backends = enabled_backends_count; 586 587 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES); 588 switch (num_tile_pipes) { 589 case 1: 590 swizzle_pipe[0] = 0; 591 break; 592 case 2: 593 swizzle_pipe[0] = 0; 594 swizzle_pipe[1] = 1; 595 break; 596 case 3: 597 swizzle_pipe[0] = 0; 598 swizzle_pipe[1] = 1; 599 swizzle_pipe[2] = 2; 600 break; 601 case 4: 602 swizzle_pipe[0] = 0; 603 swizzle_pipe[1] = 1; 604 swizzle_pipe[2] = 2; 605 swizzle_pipe[3] = 3; 606 break; 607 case 5: 608 swizzle_pipe[0] = 0; 609 swizzle_pipe[1] = 1; 610 swizzle_pipe[2] = 2; 611 swizzle_pipe[3] = 3; 612 swizzle_pipe[4] = 4; 613 break; 614 case 6: 615 swizzle_pipe[0] = 0; 616 swizzle_pipe[1] = 2; 617 swizzle_pipe[2] = 4; 618 swizzle_pipe[3] = 5; 619 swizzle_pipe[4] = 1; 620 swizzle_pipe[5] = 3; 621 break; 622 case 7: 623 swizzle_pipe[0] = 0; 624 swizzle_pipe[1] = 2; 625 swizzle_pipe[2] = 4; 626 swizzle_pipe[3] = 6; 627 swizzle_pipe[4] = 1; 628 swizzle_pipe[5] = 3; 629 swizzle_pipe[6] = 5; 630 break; 631 case 8: 632 swizzle_pipe[0] = 0; 633 swizzle_pipe[1] = 2; 634 swizzle_pipe[2] = 4; 635 swizzle_pipe[3] = 6; 636 swizzle_pipe[4] = 1; 637 swizzle_pipe[5] = 3; 638 swizzle_pipe[6] = 5; 639 swizzle_pipe[7] = 7; 640 break; 641 } 642 643 cur_backend = 0; 644 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { 645 while (((1 << cur_backend) & enabled_backends_mask) == 0) 646 cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; 647 648 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); 649 650 cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; 651 } 652 653 return backend_map; 654} 655 656static int r600_count_pipe_bits(uint32_t val) 657{ 658 int i, ret = 0; 659 for (i = 0; i < 32; i++) { 660 ret += val & 1; 661 val >>= 1; 662 } 663 return ret; 664} 665 666static void r600_gfx_init(struct drm_device *dev, 667 drm_radeon_private_t *dev_priv) 668{ 669 int i, j, num_qd_pipes; 670 u32 sx_debug_1; 671 u32 tc_cntl; 672 u32 arb_pop; 673 u32 num_gs_verts_per_thread; 674 u32 vgt_gs_per_es; 675 u32 gs_prim_buffer_depth = 0; 676 u32 sq_ms_fifo_sizes; 677 u32 sq_config; 678 u32 sq_gpr_resource_mgmt_1 = 0; 679 u32 sq_gpr_resource_mgmt_2 = 0; 680 u32 sq_thread_resource_mgmt = 0; 681 u32 sq_stack_resource_mgmt_1 = 0; 682 u32 sq_stack_resource_mgmt_2 = 0; 683 u32 hdp_host_path_cntl; 684 u32 backend_map; 685 u32 gb_tiling_config = 0; 686 u32 cc_rb_backend_disable = 0; 687 u32 cc_gc_shader_pipe_config = 0; 688 u32 ramcfg; 689 690 /* setup chip specs */ 691 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 692 case CHIP_R600: 693 dev_priv->r600_max_pipes = 4; 694 dev_priv->r600_max_tile_pipes = 8; 695 dev_priv->r600_max_simds = 4; 696 dev_priv->r600_max_backends = 4; 697 dev_priv->r600_max_gprs = 256; 698 dev_priv->r600_max_threads = 192; 699 dev_priv->r600_max_stack_entries = 256; 700 dev_priv->r600_max_hw_contexts = 8; 701 dev_priv->r600_max_gs_threads = 16; 702 dev_priv->r600_sx_max_export_size = 128; 703 dev_priv->r600_sx_max_export_pos_size = 16; 704 dev_priv->r600_sx_max_export_smx_size = 128; 705 dev_priv->r600_sq_num_cf_insts = 2; 706 break; 707 case CHIP_RV630: 708 case CHIP_RV635: 709 dev_priv->r600_max_pipes = 2; 710 dev_priv->r600_max_tile_pipes = 2; 711 dev_priv->r600_max_simds = 3; 712 dev_priv->r600_max_backends = 1; 713 dev_priv->r600_max_gprs = 128; 714 dev_priv->r600_max_threads = 192; 715 dev_priv->r600_max_stack_entries = 128; 716 dev_priv->r600_max_hw_contexts = 8; 717 dev_priv->r600_max_gs_threads = 4; 718 dev_priv->r600_sx_max_export_size = 128; 719 dev_priv->r600_sx_max_export_pos_size = 16; 720 dev_priv->r600_sx_max_export_smx_size = 128; 721 dev_priv->r600_sq_num_cf_insts = 2; 722 break; 723 case CHIP_RV610: 724 case CHIP_RS780: 725 case CHIP_RV620: 726 dev_priv->r600_max_pipes = 1; 727 dev_priv->r600_max_tile_pipes = 1; 728 dev_priv->r600_max_simds = 2; 729 dev_priv->r600_max_backends = 1; 730 dev_priv->r600_max_gprs = 128; 731 dev_priv->r600_max_threads = 192; 732 dev_priv->r600_max_stack_entries = 128; 733 dev_priv->r600_max_hw_contexts = 4; 734 dev_priv->r600_max_gs_threads = 4; 735 dev_priv->r600_sx_max_export_size = 128; 736 dev_priv->r600_sx_max_export_pos_size = 16; 737 dev_priv->r600_sx_max_export_smx_size = 128; 738 dev_priv->r600_sq_num_cf_insts = 1; 739 break; 740 case CHIP_RV670: 741 dev_priv->r600_max_pipes = 4; 742 dev_priv->r600_max_tile_pipes = 4; 743 dev_priv->r600_max_simds = 4; 744 dev_priv->r600_max_backends = 4; 745 dev_priv->r600_max_gprs = 192; 746 dev_priv->r600_max_threads = 192; 747 dev_priv->r600_max_stack_entries = 256; 748 dev_priv->r600_max_hw_contexts = 8; 749 dev_priv->r600_max_gs_threads = 16; 750 dev_priv->r600_sx_max_export_size = 128; 751 dev_priv->r600_sx_max_export_pos_size = 16; 752 dev_priv->r600_sx_max_export_smx_size = 128; 753 dev_priv->r600_sq_num_cf_insts = 2; 754 break; 755 default: 756 break; 757 } 758 759 /* Initialize HDP */ 760 j = 0; 761 for (i = 0; i < 32; i++) { 762 RADEON_WRITE((0x2c14 + j), 0x00000000); 763 RADEON_WRITE((0x2c18 + j), 0x00000000); 764 RADEON_WRITE((0x2c1c + j), 0x00000000); 765 RADEON_WRITE((0x2c20 + j), 0x00000000); 766 RADEON_WRITE((0x2c24 + j), 0x00000000); 767 j += 0x18; 768 } 769 770 RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff)); 771 772 /* setup tiling, simd, pipe config */ 773 ramcfg = RADEON_READ(R600_RAMCFG); 774 775 switch (dev_priv->r600_max_tile_pipes) { 776 case 1: 777 gb_tiling_config |= R600_PIPE_TILING(0); 778 break; 779 case 2: 780 gb_tiling_config |= R600_PIPE_TILING(1); 781 break; 782 case 4: 783 gb_tiling_config |= R600_PIPE_TILING(2); 784 break; 785 case 8: 786 gb_tiling_config |= R600_PIPE_TILING(3); 787 break; 788 default: 789 break; 790 } 791 792 gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK); 793 794 gb_tiling_config |= R600_GROUP_SIZE(0); 795 796 if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) { 797 gb_tiling_config |= R600_ROW_TILING(3); 798 gb_tiling_config |= R600_SAMPLE_SPLIT(3); 799 } else { 800 gb_tiling_config |= 801 R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK)); 802 gb_tiling_config |= 803 R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK)); 804 } 805 806 gb_tiling_config |= R600_BANK_SWAPS(1); 807 808 backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes, 809 dev_priv->r600_max_backends, 810 (0xff << dev_priv->r600_max_backends) & 0xff); 811 gb_tiling_config |= R600_BACKEND_MAP(backend_map); 812 813 cc_gc_shader_pipe_config = 814 R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK); 815 cc_gc_shader_pipe_config |= 816 R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK); 817 818 cc_rb_backend_disable = 819 R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK); 820 821 RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config); 822 RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 823 RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 824 825 RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); 826 RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 827 RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 828 829 num_qd_pipes = 830 R6XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK); 831 RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK); 832 RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK); 833 834 /* set HW defaults for 3D engine */ 835 RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) | 836 R600_ROQ_IB2_START(0x2b))); 837 838 RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) | 839 R600_ROQ_END(0x40))); 840 841 RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO | 842 R600_SYNC_GRADIENT | 843 R600_SYNC_WALKER | 844 R600_SYNC_ALIGNER)); 845 846 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) 847 RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021); 848 849 sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1); 850 sx_debug_1 |= R600_SMX_EVENT_RELEASE; 851 if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600)) 852 sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS; 853 RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1); 854 855 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) || 856 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) || 857 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 858 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 859 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) 860 RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE); 861 else 862 RADEON_WRITE(R600_DB_DEBUG, 0); 863 864 RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) | 865 R600_DEPTH_FLUSH(16) | 866 R600_DEPTH_PENDING_FREE(4) | 867 R600_DEPTH_CACHELINE_FREE(16))); 868 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 869 RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0); 870 871 RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0)); 872 RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0)); 873 874 sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES); 875 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 876 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 877 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { 878 sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) | 879 R600_FETCH_FIFO_HIWATER(0xa) | 880 R600_DONE_FIFO_HIWATER(0xe0) | 881 R600_ALU_UPDATE_FIFO_HIWATER(0x8)); 882 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) || 883 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) { 884 sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff); 885 sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4); 886 } 887 RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); 888 889 /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT 890 * should be adjusted as needed by the 2D/3D drivers. This just sets default values 891 */ 892 sq_config = RADEON_READ(R600_SQ_CONFIG); 893 sq_config &= ~(R600_PS_PRIO(3) | 894 R600_VS_PRIO(3) | 895 R600_GS_PRIO(3) | 896 R600_ES_PRIO(3)); 897 sq_config |= (R600_DX9_CONSTS | 898 R600_VC_ENABLE | 899 R600_PS_PRIO(0) | 900 R600_VS_PRIO(1) | 901 R600_GS_PRIO(2) | 902 R600_ES_PRIO(3)); 903 904 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) { 905 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) | 906 R600_NUM_VS_GPRS(124) | 907 R600_NUM_CLAUSE_TEMP_GPRS(4)); 908 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) | 909 R600_NUM_ES_GPRS(0)); 910 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) | 911 R600_NUM_VS_THREADS(48) | 912 R600_NUM_GS_THREADS(4) | 913 R600_NUM_ES_THREADS(4)); 914 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) | 915 R600_NUM_VS_STACK_ENTRIES(128)); 916 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) | 917 R600_NUM_ES_STACK_ENTRIES(0)); 918 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 919 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 920 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { 921 /* no vertex cache */ 922 sq_config &= ~R600_VC_ENABLE; 923 924 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) | 925 R600_NUM_VS_GPRS(44) | 926 R600_NUM_CLAUSE_TEMP_GPRS(2)); 927 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) | 928 R600_NUM_ES_GPRS(17)); 929 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) | 930 R600_NUM_VS_THREADS(78) | 931 R600_NUM_GS_THREADS(4) | 932 R600_NUM_ES_THREADS(31)); 933 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) | 934 R600_NUM_VS_STACK_ENTRIES(40)); 935 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) | 936 R600_NUM_ES_STACK_ENTRIES(16)); 937 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) || 938 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) { 939 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) | 940 R600_NUM_VS_GPRS(44) | 941 R600_NUM_CLAUSE_TEMP_GPRS(2)); 942 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) | 943 R600_NUM_ES_GPRS(18)); 944 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) | 945 R600_NUM_VS_THREADS(78) | 946 R600_NUM_GS_THREADS(4) | 947 R600_NUM_ES_THREADS(31)); 948 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) | 949 R600_NUM_VS_STACK_ENTRIES(40)); 950 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) | 951 R600_NUM_ES_STACK_ENTRIES(16)); 952 } else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) { 953 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) | 954 R600_NUM_VS_GPRS(44) | 955 R600_NUM_CLAUSE_TEMP_GPRS(2)); 956 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) | 957 R600_NUM_ES_GPRS(17)); 958 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) | 959 R600_NUM_VS_THREADS(78) | 960 R600_NUM_GS_THREADS(4) | 961 R600_NUM_ES_THREADS(31)); 962 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) | 963 R600_NUM_VS_STACK_ENTRIES(64)); 964 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) | 965 R600_NUM_ES_STACK_ENTRIES(64)); 966 } 967 968 RADEON_WRITE(R600_SQ_CONFIG, sq_config); 969 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1); 970 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2); 971 RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); 972 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1); 973 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2); 974 975 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 976 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 977 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) 978 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY)); 979 else 980 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC)); 981 982 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) | 983 R600_S0_Y(0x4) | 984 R600_S1_X(0x4) | 985 R600_S1_Y(0xc))); 986 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) | 987 R600_S0_Y(0xe) | 988 R600_S1_X(0x2) | 989 R600_S1_Y(0x2) | 990 R600_S2_X(0xa) | 991 R600_S2_Y(0x6) | 992 R600_S3_X(0x6) | 993 R600_S3_Y(0xa))); 994 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) | 995 R600_S0_Y(0xb) | 996 R600_S1_X(0x4) | 997 R600_S1_Y(0xc) | 998 R600_S2_X(0x1) | 999 R600_S2_Y(0x6) | 1000 R600_S3_X(0xa) | 1001 R600_S3_Y(0xe))); 1002 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) | 1003 R600_S4_Y(0x1) | 1004 R600_S5_X(0x0) | 1005 R600_S5_Y(0x0) | 1006 R600_S6_X(0xb) | 1007 R600_S6_Y(0x4) | 1008 R600_S7_X(0x7) | 1009 R600_S7_Y(0x8))); 1010 1011 1012 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1013 case CHIP_R600: 1014 case CHIP_RV630: 1015 case CHIP_RV635: 1016 gs_prim_buffer_depth = 0; 1017 break; 1018 case CHIP_RV610: 1019 case CHIP_RS780: 1020 case CHIP_RV620: 1021 gs_prim_buffer_depth = 32; 1022 break; 1023 case CHIP_RV670: 1024 gs_prim_buffer_depth = 128; 1025 break; 1026 default: 1027 break; 1028 } 1029 1030 num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16; 1031 vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; 1032 /* Max value for this is 256 */ 1033 if (vgt_gs_per_es > 256) 1034 vgt_gs_per_es = 256; 1035 1036 RADEON_WRITE(R600_VGT_ES_PER_GS, 128); 1037 RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es); 1038 RADEON_WRITE(R600_VGT_GS_PER_VS, 2); 1039 RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16); 1040 1041 /* more default values. 2D/3D driver should adjust as needed */ 1042 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0); 1043 RADEON_WRITE(R600_VGT_STRMOUT_EN, 0); 1044 RADEON_WRITE(R600_SX_MISC, 0); 1045 RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0); 1046 RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0); 1047 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0); 1048 RADEON_WRITE(R600_SPI_INPUT_Z, 0); 1049 RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2)); 1050 RADEON_WRITE(R600_CB_COLOR7_FRAG, 0); 1051 1052 /* clear render buffer base addresses */ 1053 RADEON_WRITE(R600_CB_COLOR0_BASE, 0); 1054 RADEON_WRITE(R600_CB_COLOR1_BASE, 0); 1055 RADEON_WRITE(R600_CB_COLOR2_BASE, 0); 1056 RADEON_WRITE(R600_CB_COLOR3_BASE, 0); 1057 RADEON_WRITE(R600_CB_COLOR4_BASE, 0); 1058 RADEON_WRITE(R600_CB_COLOR5_BASE, 0); 1059 RADEON_WRITE(R600_CB_COLOR6_BASE, 0); 1060 RADEON_WRITE(R600_CB_COLOR7_BASE, 0); 1061 1062 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1063 case CHIP_RV610: 1064 case CHIP_RS780: 1065 case CHIP_RV620: 1066 tc_cntl = R600_TC_L2_SIZE(8); 1067 break; 1068 case CHIP_RV630: 1069 case CHIP_RV635: 1070 tc_cntl = R600_TC_L2_SIZE(4); 1071 break; 1072 case CHIP_R600: 1073 tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT; 1074 break; 1075 default: 1076 tc_cntl = R600_TC_L2_SIZE(0); 1077 break; 1078 } 1079 1080 RADEON_WRITE(R600_TC_CNTL, tc_cntl); 1081 1082 hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL); 1083 RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 1084 1085 arb_pop = RADEON_READ(R600_ARB_POP); 1086 arb_pop |= R600_ENABLE_TC128; 1087 RADEON_WRITE(R600_ARB_POP, arb_pop); 1088 1089 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 1090 RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA | 1091 R600_NUM_CLIP_SEQ(3))); 1092 RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095)); 1093 1094} 1095 1096static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, 1097 u32 num_backends, 1098 u32 backend_disable_mask) 1099{ 1100 u32 backend_map = 0; 1101 u32 enabled_backends_mask; 1102 u32 enabled_backends_count; 1103 u32 cur_pipe; 1104 u32 swizzle_pipe[R7XX_MAX_PIPES]; 1105 u32 cur_backend; 1106 u32 i; 1107 1108 if (num_tile_pipes > R7XX_MAX_PIPES) 1109 num_tile_pipes = R7XX_MAX_PIPES; 1110 if (num_tile_pipes < 1) 1111 num_tile_pipes = 1; 1112 if (num_backends > R7XX_MAX_BACKENDS) 1113 num_backends = R7XX_MAX_BACKENDS; 1114 if (num_backends < 1) 1115 num_backends = 1; 1116 1117 enabled_backends_mask = 0; 1118 enabled_backends_count = 0; 1119 for (i = 0; i < R7XX_MAX_BACKENDS; ++i) { 1120 if (((backend_disable_mask >> i) & 1) == 0) { 1121 enabled_backends_mask |= (1 << i); 1122 ++enabled_backends_count; 1123 } 1124 if (enabled_backends_count == num_backends) 1125 break; 1126 } 1127 1128 if (enabled_backends_count == 0) { 1129 enabled_backends_mask = 1; 1130 enabled_backends_count = 1; 1131 } 1132 1133 if (enabled_backends_count != num_backends) 1134 num_backends = enabled_backends_count; 1135 1136 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); 1137 switch (num_tile_pipes) { 1138 case 1: 1139 swizzle_pipe[0] = 0; 1140 break; 1141 case 2: 1142 swizzle_pipe[0] = 0; 1143 swizzle_pipe[1] = 1; 1144 break; 1145 case 3: 1146 swizzle_pipe[0] = 0; 1147 swizzle_pipe[1] = 2; 1148 swizzle_pipe[2] = 1; 1149 break; 1150 case 4: 1151 swizzle_pipe[0] = 0; 1152 swizzle_pipe[1] = 2; 1153 swizzle_pipe[2] = 3; 1154 swizzle_pipe[3] = 1; 1155 break; 1156 case 5: 1157 swizzle_pipe[0] = 0; 1158 swizzle_pipe[1] = 2; 1159 swizzle_pipe[2] = 4; 1160 swizzle_pipe[3] = 1; 1161 swizzle_pipe[4] = 3; 1162 break; 1163 case 6: 1164 swizzle_pipe[0] = 0; 1165 swizzle_pipe[1] = 2; 1166 swizzle_pipe[2] = 4; 1167 swizzle_pipe[3] = 5; 1168 swizzle_pipe[4] = 3; 1169 swizzle_pipe[5] = 1; 1170 break; 1171 case 7: 1172 swizzle_pipe[0] = 0; 1173 swizzle_pipe[1] = 2; 1174 swizzle_pipe[2] = 4; 1175 swizzle_pipe[3] = 6; 1176 swizzle_pipe[4] = 3; 1177 swizzle_pipe[5] = 1; 1178 swizzle_pipe[6] = 5; 1179 break; 1180 case 8: 1181 swizzle_pipe[0] = 0; 1182 swizzle_pipe[1] = 2; 1183 swizzle_pipe[2] = 4; 1184 swizzle_pipe[3] = 6; 1185 swizzle_pipe[4] = 3; 1186 swizzle_pipe[5] = 1; 1187 swizzle_pipe[6] = 7; 1188 swizzle_pipe[7] = 5; 1189 break; 1190 } 1191 1192 cur_backend = 0; 1193 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { 1194 while (((1 << cur_backend) & enabled_backends_mask) == 0) 1195 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; 1196 1197 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); 1198 1199 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; 1200 } 1201 1202 return backend_map; 1203} 1204 1205static void r700_gfx_init(struct drm_device *dev, 1206 drm_radeon_private_t *dev_priv) 1207{ 1208 int i, j, num_qd_pipes; 1209 u32 sx_debug_1; 1210 u32 smx_dc_ctl0; 1211 u32 num_gs_verts_per_thread; 1212 u32 vgt_gs_per_es; 1213 u32 gs_prim_buffer_depth = 0; 1214 u32 sq_ms_fifo_sizes; 1215 u32 sq_config; 1216 u32 sq_thread_resource_mgmt; 1217 u32 hdp_host_path_cntl; 1218 u32 sq_dyn_gpr_size_simd_ab_0; 1219 u32 backend_map; 1220 u32 gb_tiling_config = 0; 1221 u32 cc_rb_backend_disable = 0; 1222 u32 cc_gc_shader_pipe_config = 0; 1223 u32 mc_arb_ramcfg; 1224 u32 db_debug4; 1225 1226 /* setup chip specs */ 1227 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1228 case CHIP_RV770: 1229 dev_priv->r600_max_pipes = 4; 1230 dev_priv->r600_max_tile_pipes = 8; 1231 dev_priv->r600_max_simds = 10; 1232 dev_priv->r600_max_backends = 4; 1233 dev_priv->r600_max_gprs = 256; 1234 dev_priv->r600_max_threads = 248; 1235 dev_priv->r600_max_stack_entries = 512; 1236 dev_priv->r600_max_hw_contexts = 8; 1237 dev_priv->r600_max_gs_threads = 16 * 2; 1238 dev_priv->r600_sx_max_export_size = 128; 1239 dev_priv->r600_sx_max_export_pos_size = 16; 1240 dev_priv->r600_sx_max_export_smx_size = 112; 1241 dev_priv->r600_sq_num_cf_insts = 2; 1242 1243 dev_priv->r700_sx_num_of_sets = 7; 1244 dev_priv->r700_sc_prim_fifo_size = 0xF9; 1245 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1246 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1247 break; 1248 case CHIP_RV740: 1249 dev_priv->r600_max_pipes = 4; 1250 dev_priv->r600_max_tile_pipes = 4; 1251 dev_priv->r600_max_simds = 8; 1252 dev_priv->r600_max_backends = 4; 1253 dev_priv->r600_max_gprs = 256; 1254 dev_priv->r600_max_threads = 248; 1255 dev_priv->r600_max_stack_entries = 512; 1256 dev_priv->r600_max_hw_contexts = 8; 1257 dev_priv->r600_max_gs_threads = 16 * 2; 1258 dev_priv->r600_sx_max_export_size = 256; 1259 dev_priv->r600_sx_max_export_pos_size = 32; 1260 dev_priv->r600_sx_max_export_smx_size = 224; 1261 dev_priv->r600_sq_num_cf_insts = 2; 1262 1263 dev_priv->r700_sx_num_of_sets = 7; 1264 dev_priv->r700_sc_prim_fifo_size = 0x100; 1265 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1266 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1267 1268 if (dev_priv->r600_sx_max_export_pos_size > 16) { 1269 dev_priv->r600_sx_max_export_pos_size -= 16; 1270 dev_priv->r600_sx_max_export_smx_size += 16; 1271 } 1272 break; 1273 case CHIP_RV730: 1274 dev_priv->r600_max_pipes = 2; 1275 dev_priv->r600_max_tile_pipes = 4; 1276 dev_priv->r600_max_simds = 8; 1277 dev_priv->r600_max_backends = 2; 1278 dev_priv->r600_max_gprs = 128; 1279 dev_priv->r600_max_threads = 248; 1280 dev_priv->r600_max_stack_entries = 256; 1281 dev_priv->r600_max_hw_contexts = 8; 1282 dev_priv->r600_max_gs_threads = 16 * 2; 1283 dev_priv->r600_sx_max_export_size = 256; 1284 dev_priv->r600_sx_max_export_pos_size = 32; 1285 dev_priv->r600_sx_max_export_smx_size = 224; 1286 dev_priv->r600_sq_num_cf_insts = 2; 1287 1288 dev_priv->r700_sx_num_of_sets = 7; 1289 dev_priv->r700_sc_prim_fifo_size = 0xf9; 1290 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1291 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1292 1293 if (dev_priv->r600_sx_max_export_pos_size > 16) { 1294 dev_priv->r600_sx_max_export_pos_size -= 16; 1295 dev_priv->r600_sx_max_export_smx_size += 16; 1296 } 1297 break; 1298 case CHIP_RV710: 1299 dev_priv->r600_max_pipes = 2; 1300 dev_priv->r600_max_tile_pipes = 2; 1301 dev_priv->r600_max_simds = 2; 1302 dev_priv->r600_max_backends = 1; 1303 dev_priv->r600_max_gprs = 256; 1304 dev_priv->r600_max_threads = 192; 1305 dev_priv->r600_max_stack_entries = 256; 1306 dev_priv->r600_max_hw_contexts = 4; 1307 dev_priv->r600_max_gs_threads = 8 * 2; 1308 dev_priv->r600_sx_max_export_size = 128; 1309 dev_priv->r600_sx_max_export_pos_size = 16; 1310 dev_priv->r600_sx_max_export_smx_size = 112; 1311 dev_priv->r600_sq_num_cf_insts = 1; 1312 1313 dev_priv->r700_sx_num_of_sets = 7; 1314 dev_priv->r700_sc_prim_fifo_size = 0x40; 1315 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1316 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1317 break; 1318 default: 1319 break; 1320 } 1321 1322 /* Initialize HDP */ 1323 j = 0; 1324 for (i = 0; i < 32; i++) { 1325 RADEON_WRITE((0x2c14 + j), 0x00000000); 1326 RADEON_WRITE((0x2c18 + j), 0x00000000); 1327 RADEON_WRITE((0x2c1c + j), 0x00000000); 1328 RADEON_WRITE((0x2c20 + j), 0x00000000); 1329 RADEON_WRITE((0x2c24 + j), 0x00000000); 1330 j += 0x18; 1331 } 1332 1333 RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff)); 1334 1335 /* setup tiling, simd, pipe config */ 1336 mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG); 1337 1338 switch (dev_priv->r600_max_tile_pipes) { 1339 case 1: 1340 gb_tiling_config |= R600_PIPE_TILING(0); 1341 break; 1342 case 2: 1343 gb_tiling_config |= R600_PIPE_TILING(1); 1344 break; 1345 case 4: 1346 gb_tiling_config |= R600_PIPE_TILING(2); 1347 break; 1348 case 8: 1349 gb_tiling_config |= R600_PIPE_TILING(3); 1350 break; 1351 default: 1352 break; 1353 } 1354 1355 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770) 1356 gb_tiling_config |= R600_BANK_TILING(1); 1357 else 1358 gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK); 1359 1360 gb_tiling_config |= R600_GROUP_SIZE(0); 1361 1362 if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) { 1363 gb_tiling_config |= R600_ROW_TILING(3); 1364 gb_tiling_config |= R600_SAMPLE_SPLIT(3); 1365 } else { 1366 gb_tiling_config |= 1367 R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK)); 1368 gb_tiling_config |= 1369 R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK)); 1370 } 1371 1372 gb_tiling_config |= R600_BANK_SWAPS(1); 1373 1374 backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes, 1375 dev_priv->r600_max_backends, 1376 (0xff << dev_priv->r600_max_backends) & 0xff); 1377 gb_tiling_config |= R600_BACKEND_MAP(backend_map); 1378 1379 cc_gc_shader_pipe_config = 1380 R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK); 1381 cc_gc_shader_pipe_config |= 1382 R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK); 1383 1384 cc_rb_backend_disable = 1385 R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK); 1386 1387 RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config); 1388 RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 1389 RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 1390 1391 RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1392 RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 1393 RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 1394 1395 RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1396 RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0); 1397 RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0); 1398 RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0); 1399 RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0); 1400 1401 num_qd_pipes = 1402 R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK); 1403 RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK); 1404 RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK); 1405 1406 /* set HW defaults for 3D engine */ 1407 RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) | 1408 R600_ROQ_IB2_START(0x2b))); 1409 1410 RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30)); 1411 1412 RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO | 1413 R600_SYNC_GRADIENT | 1414 R600_SYNC_WALKER | 1415 R600_SYNC_ALIGNER)); 1416 1417 sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1); 1418 sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS; 1419 RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1); 1420 1421 smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0); 1422 smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff); 1423 smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1); 1424 RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0); 1425 1426 RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) | 1427 R700_GS_FLUSH_CTL(4) | 1428 R700_ACK_FLUSH_CTL(3) | 1429 R700_SYNC_FLUSH_CTL)); 1430 1431 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770) 1432 RADEON_WRITE(R700_DB_DEBUG3, R700_DB_CLK_OFF_DELAY(0x1f)); 1433 else { 1434 db_debug4 = RADEON_READ(RV700_DB_DEBUG4); 1435 db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER; 1436 RADEON_WRITE(RV700_DB_DEBUG4, db_debug4); 1437 } 1438 1439 RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) | 1440 R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) | 1441 R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1))); 1442 1443 RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) | 1444 R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) | 1445 R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize))); 1446 1447 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 1448 1449 RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1); 1450 1451 RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0)); 1452 1453 RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4)); 1454 1455 RADEON_WRITE(R600_CP_PERFMON_CNTL, 0); 1456 1457 sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) | 1458 R600_DONE_FIFO_HIWATER(0xe0) | 1459 R600_ALU_UPDATE_FIFO_HIWATER(0x8)); 1460 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1461 case CHIP_RV770: 1462 sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1); 1463 break; 1464 case CHIP_RV740: 1465 case CHIP_RV730: 1466 case CHIP_RV710: 1467 default: 1468 sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4); 1469 break; 1470 } 1471 RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); 1472 1473 /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT 1474 * should be adjusted as needed by the 2D/3D drivers. This just sets default values 1475 */ 1476 sq_config = RADEON_READ(R600_SQ_CONFIG); 1477 sq_config &= ~(R600_PS_PRIO(3) | 1478 R600_VS_PRIO(3) | 1479 R600_GS_PRIO(3) | 1480 R600_ES_PRIO(3)); 1481 sq_config |= (R600_DX9_CONSTS | 1482 R600_VC_ENABLE | 1483 R600_EXPORT_SRC_C | 1484 R600_PS_PRIO(0) | 1485 R600_VS_PRIO(1) | 1486 R600_GS_PRIO(2) | 1487 R600_ES_PRIO(3)); 1488 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710) 1489 /* no vertex cache */ 1490 sq_config &= ~R600_VC_ENABLE; 1491 1492 RADEON_WRITE(R600_SQ_CONFIG, sq_config); 1493 1494 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) | 1495 R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) | 1496 R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2))); 1497 1498 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) | 1499 R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64))); 1500 1501 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) | 1502 R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) | 1503 R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8)); 1504 if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads) 1505 sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads); 1506 else 1507 sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8); 1508 RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); 1509 1510 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) | 1511 R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4))); 1512 1513 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) | 1514 R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4))); 1515 1516 sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) | 1517 R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) | 1518 R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) | 1519 R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64)); 1520 1521 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0); 1522 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0); 1523 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0); 1524 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0); 1525 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0); 1526 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0); 1527 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0); 1528 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0); 1529 1530 RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) | 1531 R700_FORCE_EOV_MAX_REZ_CNT(255))); 1532 1533 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710) 1534 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) | 1535 R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO))); 1536 else 1537 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) | 1538 R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO))); 1539 1540 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1541 case CHIP_RV770: 1542 case CHIP_RV740: 1543 case CHIP_RV730: 1544 gs_prim_buffer_depth = 384; 1545 break; 1546 case CHIP_RV710: 1547 gs_prim_buffer_depth = 128; 1548 break; 1549 default: 1550 break; 1551 } 1552 1553 num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16; 1554 vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; 1555 /* Max value for this is 256 */ 1556 if (vgt_gs_per_es > 256) 1557 vgt_gs_per_es = 256; 1558 1559 RADEON_WRITE(R600_VGT_ES_PER_GS, 128); 1560 RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es); 1561 RADEON_WRITE(R600_VGT_GS_PER_VS, 2); 1562 1563 /* more default values. 2D/3D driver should adjust as needed */ 1564 RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16); 1565 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0); 1566 RADEON_WRITE(R600_VGT_STRMOUT_EN, 0); 1567 RADEON_WRITE(R600_SX_MISC, 0); 1568 RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0); 1569 RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa); 1570 RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0); 1571 RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff); 1572 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0); 1573 RADEON_WRITE(R600_SPI_INPUT_Z, 0); 1574 RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2)); 1575 RADEON_WRITE(R600_CB_COLOR7_FRAG, 0); 1576 1577 /* clear render buffer base addresses */ 1578 RADEON_WRITE(R600_CB_COLOR0_BASE, 0); 1579 RADEON_WRITE(R600_CB_COLOR1_BASE, 0); 1580 RADEON_WRITE(R600_CB_COLOR2_BASE, 0); 1581 RADEON_WRITE(R600_CB_COLOR3_BASE, 0); 1582 RADEON_WRITE(R600_CB_COLOR4_BASE, 0); 1583 RADEON_WRITE(R600_CB_COLOR5_BASE, 0); 1584 RADEON_WRITE(R600_CB_COLOR6_BASE, 0); 1585 RADEON_WRITE(R600_CB_COLOR7_BASE, 0); 1586 1587 RADEON_WRITE(R700_TCP_CNTL, 0); 1588 1589 hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL); 1590 RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 1591 1592 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 1593 1594 RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA | 1595 R600_NUM_CLIP_SEQ(3))); 1596 1597} 1598 1599static void r600_cp_init_ring_buffer(struct drm_device *dev, 1600 drm_radeon_private_t *dev_priv, 1601 struct drm_file *file_priv) 1602{ 1603 u32 ring_start; 1604 u64 rptr_addr; 1605 1606 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) 1607 r700_gfx_init(dev, dev_priv); 1608 else 1609 r600_gfx_init(dev, dev_priv); 1610 1611 RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP); 1612 RADEON_READ(R600_GRBM_SOFT_RESET); 1613 DRM_UDELAY(15000); 1614 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 1615 1616 1617 /* Set ring buffer size */ 1618#ifdef __BIG_ENDIAN 1619 RADEON_WRITE(R600_CP_RB_CNTL, 1620 RADEON_BUF_SWAP_32BIT | 1621 RADEON_RB_NO_UPDATE | 1622 (dev_priv->ring.rptr_update_l2qw << 8) | 1623 dev_priv->ring.size_l2qw); 1624#else 1625 RADEON_WRITE(R600_CP_RB_CNTL, 1626 RADEON_RB_NO_UPDATE | 1627 (dev_priv->ring.rptr_update_l2qw << 8) | 1628 dev_priv->ring.size_l2qw); 1629#endif 1630 1631 RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4); 1632 1633 /* Set the write pointer delay */ 1634 RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0); 1635 1636#ifdef __BIG_ENDIAN 1637 RADEON_WRITE(R600_CP_RB_CNTL, 1638 RADEON_BUF_SWAP_32BIT | 1639 RADEON_RB_NO_UPDATE | 1640 RADEON_RB_RPTR_WR_ENA | 1641 (dev_priv->ring.rptr_update_l2qw << 8) | 1642 dev_priv->ring.size_l2qw); 1643#else 1644 RADEON_WRITE(R600_CP_RB_CNTL, 1645 RADEON_RB_NO_UPDATE | 1646 RADEON_RB_RPTR_WR_ENA | 1647 (dev_priv->ring.rptr_update_l2qw << 8) | 1648 dev_priv->ring.size_l2qw); 1649#endif 1650 1651 /* Initialize the ring buffer's read and write pointers */ 1652 RADEON_WRITE(R600_CP_RB_RPTR_WR, 0); 1653 RADEON_WRITE(R600_CP_RB_WPTR, 0); 1654 SET_RING_HEAD(dev_priv, 0); 1655 dev_priv->ring.tail = 0; 1656 1657#if __OS_HAS_AGP 1658 if (dev_priv->flags & RADEON_IS_AGP) { 1659 rptr_addr = dev_priv->ring_rptr->offset 1660 - dev->agp->base + 1661 dev_priv->gart_vm_start; 1662 } else 1663#endif 1664 { 1665 rptr_addr = dev_priv->ring_rptr->offset 1666 - ((unsigned long) dev->sg->virtual) 1667 + dev_priv->gart_vm_start; 1668 } 1669 RADEON_WRITE(R600_CP_RB_RPTR_ADDR, 1670 rptr_addr & 0xffffffff); 1671 RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI, 1672 upper_32_bits(rptr_addr)); 1673 1674#ifdef __BIG_ENDIAN 1675 RADEON_WRITE(R600_CP_RB_CNTL, 1676 RADEON_BUF_SWAP_32BIT | 1677 (dev_priv->ring.rptr_update_l2qw << 8) | 1678 dev_priv->ring.size_l2qw); 1679#else 1680 RADEON_WRITE(R600_CP_RB_CNTL, 1681 (dev_priv->ring.rptr_update_l2qw << 8) | 1682 dev_priv->ring.size_l2qw); 1683#endif 1684 1685#if __OS_HAS_AGP 1686 if (dev_priv->flags & RADEON_IS_AGP) { 1687 /* XXX */ 1688 radeon_write_agp_base(dev_priv, dev->agp->base); 1689 1690 /* XXX */ 1691 radeon_write_agp_location(dev_priv, 1692 (((dev_priv->gart_vm_start - 1 + 1693 dev_priv->gart_size) & 0xffff0000) | 1694 (dev_priv->gart_vm_start >> 16))); 1695 1696 ring_start = (dev_priv->cp_ring->offset 1697 - dev->agp->base 1698 + dev_priv->gart_vm_start); 1699 } else 1700#endif 1701 ring_start = (dev_priv->cp_ring->offset 1702 - (unsigned long)dev->sg->virtual 1703 + dev_priv->gart_vm_start); 1704 1705 RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8); 1706 1707 RADEON_WRITE(R600_CP_ME_CNTL, 0xff); 1708 1709 RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28)); 1710 1711 /* Initialize the scratch register pointer. This will cause 1712 * the scratch register values to be written out to memory 1713 * whenever they are updated. 1714 * 1715 * We simply put this behind the ring read pointer, this works 1716 * with PCI GART as well as (whatever kind of) AGP GART 1717 */ 1718 { 1719 u64 scratch_addr; 1720 1721 scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR); 1722 scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32; 1723 scratch_addr += R600_SCRATCH_REG_OFFSET; 1724 scratch_addr >>= 8; 1725 scratch_addr &= 0xffffffff; 1726 1727 RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr); 1728 } 1729 1730 RADEON_WRITE(R600_SCRATCH_UMSK, 0x7); 1731 1732 /* Turn on bus mastering */ 1733 radeon_enable_bm(dev_priv); 1734 1735 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0); 1736 RADEON_WRITE(R600_LAST_FRAME_REG, 0); 1737 1738 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0); 1739 RADEON_WRITE(R600_LAST_DISPATCH_REG, 0); 1740 1741 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0); 1742 RADEON_WRITE(R600_LAST_CLEAR_REG, 0); 1743 1744 /* reset sarea copies of these */ 1745 if (dev_priv->sarea_priv) { 1746 dev_priv->sarea_priv->last_frame = 0; 1747 dev_priv->sarea_priv->last_dispatch = 0; 1748 dev_priv->sarea_priv->last_clear = 0; 1749 } 1750 1751 r600_do_wait_for_idle(dev_priv); 1752 1753} 1754 1755int r600_do_cleanup_cp(struct drm_device *dev) 1756{ 1757 drm_radeon_private_t *dev_priv = dev->dev_private; 1758 DRM_DEBUG("\n"); 1759 1760 /* Make sure interrupts are disabled here because the uninstall ioctl 1761 * may not have been called from userspace and after dev_private 1762 * is freed, it's too late. 1763 */ 1764 if (dev->irq_enabled) 1765 drm_irq_uninstall(dev); 1766 1767#if __OS_HAS_AGP 1768 if (dev_priv->flags & RADEON_IS_AGP) { 1769 if (dev_priv->cp_ring != NULL) { 1770 drm_core_ioremapfree(dev_priv->cp_ring, dev); 1771 dev_priv->cp_ring = NULL; 1772 } 1773 if (dev_priv->ring_rptr != NULL) { 1774 drm_core_ioremapfree(dev_priv->ring_rptr, dev); 1775 dev_priv->ring_rptr = NULL; 1776 } 1777 if (dev->agp_buffer_map != NULL) { 1778 drm_core_ioremapfree(dev->agp_buffer_map, dev); 1779 dev->agp_buffer_map = NULL; 1780 } 1781 } else 1782#endif 1783 { 1784 1785 if (dev_priv->gart_info.bus_addr) 1786 r600_page_table_cleanup(dev, &dev_priv->gart_info); 1787 1788 if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) { 1789 drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev); 1790 dev_priv->gart_info.addr = 0; 1791 } 1792 } 1793 /* only clear to the start of flags */ 1794 memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags)); 1795 1796 return 0; 1797} 1798 1799int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, 1800 struct drm_file *file_priv) 1801{ 1802 drm_radeon_private_t *dev_priv = dev->dev_private; 1803 1804 DRM_DEBUG("\n"); 1805 1806 /* if we require new memory map but we don't have it fail */ 1807 if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) { 1808 DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n"); 1809 r600_do_cleanup_cp(dev); 1810 return -EINVAL; 1811 } 1812 1813 if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) { 1814 DRM_DEBUG("Forcing AGP card to PCI mode\n"); 1815 dev_priv->flags &= ~RADEON_IS_AGP; 1816 /* The writeback test succeeds, but when writeback is enabled, 1817 * the ring buffer read ptr update fails after first 128 bytes. 1818 */ 1819 radeon_no_wb = 1; 1820 } else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE)) 1821 && !init->is_pci) { 1822 DRM_DEBUG("Restoring AGP flag\n"); 1823 dev_priv->flags |= RADEON_IS_AGP; 1824 } 1825 1826 dev_priv->usec_timeout = init->usec_timeout; 1827 if (dev_priv->usec_timeout < 1 || 1828 dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) { 1829 DRM_DEBUG("TIMEOUT problem!\n"); 1830 r600_do_cleanup_cp(dev); 1831 return -EINVAL; 1832 } 1833 1834 /* Enable vblank on CRTC1 for older X servers 1835 */ 1836 dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1; 1837 1838 dev_priv->cp_mode = init->cp_mode; 1839 1840 /* We don't support anything other than bus-mastering ring mode, 1841 * but the ring can be in either AGP or PCI space for the ring 1842 * read pointer. 1843 */ 1844 if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) && 1845 (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) { 1846 DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode); 1847 r600_do_cleanup_cp(dev); 1848 return -EINVAL; 1849 } 1850 1851 switch (init->fb_bpp) { 1852 case 16: 1853 dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565; 1854 break; 1855 case 32: 1856 default: 1857 dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888; 1858 break; 1859 } 1860 dev_priv->front_offset = init->front_offset; 1861 dev_priv->front_pitch = init->front_pitch; 1862 dev_priv->back_offset = init->back_offset; 1863 dev_priv->back_pitch = init->back_pitch; 1864 1865 dev_priv->ring_offset = init->ring_offset; 1866 dev_priv->ring_rptr_offset = init->ring_rptr_offset; 1867 dev_priv->buffers_offset = init->buffers_offset; 1868 dev_priv->gart_textures_offset = init->gart_textures_offset; 1869 1870 dev_priv->sarea = drm_getsarea(dev); 1871 if (!dev_priv->sarea) { 1872 DRM_ERROR("could not find sarea!\n"); 1873 r600_do_cleanup_cp(dev); 1874 return -EINVAL; 1875 } 1876 1877 dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset); 1878 if (!dev_priv->cp_ring) { 1879 DRM_ERROR("could not find cp ring region!\n"); 1880 r600_do_cleanup_cp(dev); 1881 return -EINVAL; 1882 } 1883 dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset); 1884 if (!dev_priv->ring_rptr) { 1885 DRM_ERROR("could not find ring read pointer!\n"); 1886 r600_do_cleanup_cp(dev); 1887 return -EINVAL; 1888 } 1889 dev->agp_buffer_token = init->buffers_offset; 1890 dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); 1891 if (!dev->agp_buffer_map) { 1892 DRM_ERROR("could not find dma buffer region!\n"); 1893 r600_do_cleanup_cp(dev); 1894 return -EINVAL; 1895 } 1896 1897 if (init->gart_textures_offset) { 1898 dev_priv->gart_textures = 1899 drm_core_findmap(dev, init->gart_textures_offset); 1900 if (!dev_priv->gart_textures) { 1901 DRM_ERROR("could not find GART texture region!\n"); 1902 r600_do_cleanup_cp(dev); 1903 return -EINVAL; 1904 } 1905 } 1906 1907 dev_priv->sarea_priv = 1908 (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->handle + 1909 init->sarea_priv_offset); 1910 1911#if __OS_HAS_AGP 1912 /* XXX */ 1913 if (dev_priv->flags & RADEON_IS_AGP) { 1914 drm_core_ioremap_wc(dev_priv->cp_ring, dev); 1915 drm_core_ioremap_wc(dev_priv->ring_rptr, dev); 1916 drm_core_ioremap_wc(dev->agp_buffer_map, dev); 1917 if (!dev_priv->cp_ring->handle || 1918 !dev_priv->ring_rptr->handle || 1919 !dev->agp_buffer_map->handle) { 1920 DRM_ERROR("could not find ioremap agp regions!\n"); 1921 r600_do_cleanup_cp(dev); 1922 return -EINVAL; 1923 } 1924 } else 1925#endif 1926 { 1927 dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset; 1928 dev_priv->ring_rptr->handle = 1929 (void *)dev_priv->ring_rptr->offset; 1930 dev->agp_buffer_map->handle = 1931 (void *)dev->agp_buffer_map->offset; 1932 1933 DRM_DEBUG("dev_priv->cp_ring->handle %p\n", 1934 dev_priv->cp_ring->handle); 1935 DRM_DEBUG("dev_priv->ring_rptr->handle %p\n", 1936 dev_priv->ring_rptr->handle); 1937 DRM_DEBUG("dev->agp_buffer_map->handle %p\n", 1938 dev->agp_buffer_map->handle); 1939 } 1940 1941 dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24; 1942 dev_priv->fb_size = 1943 (((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000) 1944 - dev_priv->fb_location; 1945 1946 dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) | 1947 ((dev_priv->front_offset 1948 + dev_priv->fb_location) >> 10)); 1949 1950 dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) | 1951 ((dev_priv->back_offset 1952 + dev_priv->fb_location) >> 10)); 1953 1954 dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) | 1955 ((dev_priv->depth_offset 1956 + dev_priv->fb_location) >> 10)); 1957 1958 dev_priv->gart_size = init->gart_size; 1959 1960 /* New let's set the memory map ... */ 1961 if (dev_priv->new_memmap) { 1962 u32 base = 0; 1963 1964 DRM_INFO("Setting GART location based on new memory map\n"); 1965 1966 /* If using AGP, try to locate the AGP aperture at the same 1967 * location in the card and on the bus, though we have to 1968 * align it down. 1969 */ 1970#if __OS_HAS_AGP 1971 /* XXX */ 1972 if (dev_priv->flags & RADEON_IS_AGP) { 1973 base = dev->agp->base; 1974 /* Check if valid */ 1975 if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location && 1976 base < (dev_priv->fb_location + dev_priv->fb_size - 1)) { 1977 DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n", 1978 dev->agp->base); 1979 base = 0; 1980 } 1981 } 1982#endif 1983 /* If not or if AGP is at 0 (Macs), try to put it elsewhere */ 1984 if (base == 0) { 1985 base = dev_priv->fb_location + dev_priv->fb_size; 1986 if (base < dev_priv->fb_location || 1987 ((base + dev_priv->gart_size) & 0xfffffffful) < base) 1988 base = dev_priv->fb_location 1989 - dev_priv->gart_size; 1990 } 1991 dev_priv->gart_vm_start = base & 0xffc00000u; 1992 if (dev_priv->gart_vm_start != base) 1993 DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n", 1994 base, dev_priv->gart_vm_start); 1995 } 1996 1997#if __OS_HAS_AGP 1998 /* XXX */ 1999 if (dev_priv->flags & RADEON_IS_AGP) 2000 dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset 2001 - dev->agp->base 2002 + dev_priv->gart_vm_start); 2003 else 2004#endif 2005 dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset 2006 - (unsigned long)dev->sg->virtual 2007 + dev_priv->gart_vm_start); 2008 2009 DRM_DEBUG("fb 0x%08x size %d\n", 2010 (unsigned int) dev_priv->fb_location, 2011 (unsigned int) dev_priv->fb_size); 2012 DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size); 2013 DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n", 2014 (unsigned int) dev_priv->gart_vm_start); 2015 DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n", 2016 dev_priv->gart_buffers_offset); 2017 2018 dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle; 2019 dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle 2020 + init->ring_size / sizeof(u32)); 2021 dev_priv->ring.size = init->ring_size; 2022 dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8); 2023 2024 dev_priv->ring.rptr_update = /* init->rptr_update */ 4096; 2025 dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8); 2026 2027 dev_priv->ring.fetch_size = /* init->fetch_size */ 32; 2028 dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16); 2029 2030 dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1; 2031 2032 dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK; 2033 2034#if __OS_HAS_AGP 2035 if (dev_priv->flags & RADEON_IS_AGP) { 2036 /* XXX turn off pcie gart */ 2037 } else 2038#endif 2039 { 2040 dev_priv->gart_info.table_mask = DMA_BIT_MASK(32); 2041 /* if we have an offset set from userspace */ 2042 if (!dev_priv->pcigart_offset_set) { 2043 DRM_ERROR("Need gart offset from userspace\n"); 2044 r600_do_cleanup_cp(dev); 2045 return -EINVAL; 2046 } 2047 2048 DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset); 2049 2050 dev_priv->gart_info.bus_addr = 2051 dev_priv->pcigart_offset + dev_priv->fb_location; 2052 dev_priv->gart_info.mapping.offset = 2053 dev_priv->pcigart_offset + dev_priv->fb_aper_offset; 2054 dev_priv->gart_info.mapping.size = 2055 dev_priv->gart_info.table_size; 2056 2057 drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev); 2058 if (!dev_priv->gart_info.mapping.handle) { 2059 DRM_ERROR("ioremap failed.\n"); 2060 r600_do_cleanup_cp(dev); 2061 return -EINVAL; 2062 } 2063 2064 dev_priv->gart_info.addr = 2065 dev_priv->gart_info.mapping.handle; 2066 2067 DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n", 2068 dev_priv->gart_info.addr, 2069 dev_priv->pcigart_offset); 2070 2071 if (!r600_page_table_init(dev)) { 2072 DRM_ERROR("Failed to init GART table\n"); 2073 r600_do_cleanup_cp(dev); 2074 return -EINVAL; 2075 } 2076 2077 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) 2078 r700_vm_init(dev); 2079 else 2080 r600_vm_init(dev); 2081 } 2082 2083 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) 2084 r700_cp_load_microcode(dev_priv); 2085 else 2086 r600_cp_load_microcode(dev_priv); 2087 2088 r600_cp_init_ring_buffer(dev, dev_priv, file_priv); 2089 2090 dev_priv->last_buf = 0; 2091 2092 r600_do_engine_reset(dev); 2093 r600_test_writeback(dev_priv); 2094 2095 return 0; 2096} 2097 2098int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv) 2099{ 2100 drm_radeon_private_t *dev_priv = dev->dev_private; 2101 2102 DRM_DEBUG("\n"); 2103 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) { 2104 r700_vm_init(dev); 2105 r700_cp_load_microcode(dev_priv); 2106 } else { 2107 r600_vm_init(dev); 2108 r600_cp_load_microcode(dev_priv); 2109 } 2110 r600_cp_init_ring_buffer(dev, dev_priv, file_priv); 2111 r600_do_engine_reset(dev); 2112 2113 return 0; 2114} 2115 2116/* Wait for the CP to go idle. 2117 */ 2118int r600_do_cp_idle(drm_radeon_private_t *dev_priv) 2119{ 2120 RING_LOCALS; 2121 DRM_DEBUG("\n"); 2122 2123 BEGIN_RING(5); 2124 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); 2125 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); 2126 /* wait for 3D idle clean */ 2127 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); 2128 OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2); 2129 OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN); 2130 2131 ADVANCE_RING(); 2132 COMMIT_RING(); 2133 2134 return r600_do_wait_for_idle(dev_priv); 2135} 2136 2137/* Start the Command Processor. 2138 */ 2139void r600_do_cp_start(drm_radeon_private_t *dev_priv) 2140{ 2141 u32 cp_me; 2142 RING_LOCALS; 2143 DRM_DEBUG("\n"); 2144 2145 BEGIN_RING(7); 2146 OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5)); 2147 OUT_RING(0x00000001); 2148 if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) 2149 OUT_RING(0x00000003); 2150 else 2151 OUT_RING(0x00000000); 2152 OUT_RING((dev_priv->r600_max_hw_contexts - 1)); 2153 OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1)); 2154 OUT_RING(0x00000000); 2155 OUT_RING(0x00000000); 2156 ADVANCE_RING(); 2157 COMMIT_RING(); 2158 2159 /* set the mux and reset the halt bit */ 2160 cp_me = 0xff; 2161 RADEON_WRITE(R600_CP_ME_CNTL, cp_me); 2162 2163 dev_priv->cp_running = 1; 2164 2165} 2166 2167void r600_do_cp_reset(drm_radeon_private_t *dev_priv) 2168{ 2169 u32 cur_read_ptr; 2170 DRM_DEBUG("\n"); 2171 2172 cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR); 2173 RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr); 2174 SET_RING_HEAD(dev_priv, cur_read_ptr); 2175 dev_priv->ring.tail = cur_read_ptr; 2176} 2177 2178void r600_do_cp_stop(drm_radeon_private_t *dev_priv) 2179{ 2180 uint32_t cp_me; 2181 2182 DRM_DEBUG("\n"); 2183 2184 cp_me = 0xff | R600_CP_ME_HALT; 2185 2186 RADEON_WRITE(R600_CP_ME_CNTL, cp_me); 2187 2188 dev_priv->cp_running = 0; 2189} 2190 2191int r600_cp_dispatch_indirect(struct drm_device *dev, 2192 struct drm_buf *buf, int start, int end) 2193{ 2194 drm_radeon_private_t *dev_priv = dev->dev_private; 2195 RING_LOCALS; 2196 2197 if (start != end) { 2198 unsigned long offset = (dev_priv->gart_buffers_offset 2199 + buf->offset + start); 2200 int dwords = (end - start + 3) / sizeof(u32); 2201 2202 DRM_DEBUG("dwords:%d\n", dwords); 2203 DRM_DEBUG("offset 0x%lx\n", offset); 2204 2205 2206 /* Indirect buffer data must be a multiple of 16 dwords. 2207 * pad the data with a Type-2 CP packet. 2208 */ 2209 while (dwords & 0xf) { 2210 u32 *data = (u32 *) 2211 ((char *)dev->agp_buffer_map->handle 2212 + buf->offset + start); 2213 data[dwords++] = RADEON_CP_PACKET2; 2214 } 2215 2216 /* Fire off the indirect buffer */ 2217 BEGIN_RING(4); 2218 OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2)); 2219 OUT_RING((offset & 0xfffffffc)); 2220 OUT_RING((upper_32_bits(offset) & 0xff)); 2221 OUT_RING(dwords); 2222 ADVANCE_RING(); 2223 } 2224 2225 return 0; 2226} 2227