1/* 2 * Copyright 2009 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Alex Deucher <alexander.deucher@amd.com> 25 */ 26#include "drmP.h" 27#include "drm.h" 28#include "radeon_drm.h" 29#include "radeon_drv.h" 30 31#include "r600_blit_shaders.h" 32 33#define DI_PT_RECTLIST 0x11 34#define DI_INDEX_SIZE_16_BIT 0x0 35#define DI_SRC_SEL_AUTO_INDEX 0x2 36 37#define FMT_8 0x1 38#define FMT_5_6_5 0x8 39#define FMT_8_8_8_8 0x1a 40#define COLOR_8 0x1 41#define COLOR_5_6_5 0x8 42#define COLOR_8_8_8_8 0x1a 43 44static inline void 45set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr) 46{ 47 u32 cb_color_info; 48 int pitch, slice; 49 RING_LOCALS; 50 DRM_DEBUG("\n"); 51 52 h = ALIGN(h, 8); 53 if (h < 8) 54 h = 8; 55 56 cb_color_info = ((format << 2) | (1 << 27)); 57 pitch = (w / 8) - 1; 58 slice = ((w * h) / 64) - 1; 59 60 if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) && 61 ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) { 62 BEGIN_RING(21 + 2); 63 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 64 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); 65 OUT_RING(gpu_addr >> 8); 66 OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); 67 OUT_RING(2 << 0); 68 } else { 69 BEGIN_RING(21); 70 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 71 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); 72 OUT_RING(gpu_addr >> 8); 73 } 74 75 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 76 OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2); 77 OUT_RING((pitch << 0) | (slice << 10)); 78 79 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 80 OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2); 81 OUT_RING(0); 82 83 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 84 OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2); 85 OUT_RING(cb_color_info); 86 87 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 88 OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2); 89 OUT_RING(0); 90 91 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 92 OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2); 93 OUT_RING(0); 94 95 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 96 OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2); 97 OUT_RING(0); 98 99 ADVANCE_RING(); 100} 101 102static inline void 103cp_set_surface_sync(drm_radeon_private_t *dev_priv, 104 u32 sync_type, u32 size, u64 mc_addr) 105{ 106 u32 cp_coher_size; 107 RING_LOCALS; 108 DRM_DEBUG("\n"); 109 110 if (size == 0xffffffff) 111 cp_coher_size = 0xffffffff; 112 else 113 cp_coher_size = ((size + 255) >> 8); 114 115 BEGIN_RING(5); 116 OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); 117 OUT_RING(sync_type); 118 OUT_RING(cp_coher_size); 119 OUT_RING((mc_addr >> 8)); 120 OUT_RING(10); /* poll interval */ 121 ADVANCE_RING(); 122} 123 124static inline void 125set_shaders(struct drm_device *dev) 126{ 127 drm_radeon_private_t *dev_priv = dev->dev_private; 128 u64 gpu_addr; 129 int i; 130 u32 *vs, *ps; 131 uint32_t sq_pgm_resources; 132 RING_LOCALS; 133 DRM_DEBUG("\n"); 134 135 /* load shaders */ 136 vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset); 137 ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256); 138 139 for (i = 0; i < r6xx_vs_size; i++) 140 vs[i] = r6xx_vs[i]; 141 for (i = 0; i < r6xx_ps_size; i++) 142 ps[i] = r6xx_ps[i]; 143 144 dev_priv->blit_vb->used = 512; 145 146 gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset; 147 148 /* setup shader regs */ 149 sq_pgm_resources = (1 << 0); 150 151 BEGIN_RING(9 + 12); 152 /* VS */ 153 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 154 OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 155 OUT_RING(gpu_addr >> 8); 156 157 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 158 OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 159 OUT_RING(sq_pgm_resources); 160 161 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 162 OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 163 OUT_RING(0); 164 165 /* PS */ 166 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 167 OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 168 OUT_RING((gpu_addr + 256) >> 8); 169 170 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 171 OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 172 OUT_RING(sq_pgm_resources | (1 << 28)); 173 174 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 175 OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 176 OUT_RING(2); 177 178 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 179 OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 180 OUT_RING(0); 181 ADVANCE_RING(); 182 183 cp_set_surface_sync(dev_priv, 184 R600_SH_ACTION_ENA, 512, gpu_addr); 185} 186 187static inline void 188set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) 189{ 190 uint32_t sq_vtx_constant_word2; 191 RING_LOCALS; 192 DRM_DEBUG("\n"); 193 194 sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8)); 195 196 BEGIN_RING(9); 197 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); 198 OUT_RING(0x460); 199 OUT_RING(gpu_addr & 0xffffffff); 200 OUT_RING(48 - 1); 201 OUT_RING(sq_vtx_constant_word2); 202 OUT_RING(1 << 0); 203 OUT_RING(0); 204 OUT_RING(0); 205 OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30); 206 ADVANCE_RING(); 207 208 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 209 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 210 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || 211 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || 212 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) 213 cp_set_surface_sync(dev_priv, 214 R600_TC_ACTION_ENA, 48, gpu_addr); 215 else 216 cp_set_surface_sync(dev_priv, 217 R600_VC_ACTION_ENA, 48, gpu_addr); 218} 219 220static inline void 221set_tex_resource(drm_radeon_private_t *dev_priv, 222 int format, int w, int h, int pitch, u64 gpu_addr) 223{ 224 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 225 RING_LOCALS; 226 DRM_DEBUG("\n"); 227 228 if (h < 1) 229 h = 1; 230 231 sq_tex_resource_word0 = (1 << 0); 232 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | 233 ((w - 1) << 19)); 234 235 sq_tex_resource_word1 = (format << 26); 236 sq_tex_resource_word1 |= ((h - 1) << 0); 237 238 sq_tex_resource_word4 = ((1 << 14) | 239 (0 << 16) | 240 (1 << 19) | 241 (2 << 22) | 242 (3 << 25)); 243 244 BEGIN_RING(9); 245 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); 246 OUT_RING(0); 247 OUT_RING(sq_tex_resource_word0); 248 OUT_RING(sq_tex_resource_word1); 249 OUT_RING(gpu_addr >> 8); 250 OUT_RING(gpu_addr >> 8); 251 OUT_RING(sq_tex_resource_word4); 252 OUT_RING(0); 253 OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30); 254 ADVANCE_RING(); 255 256} 257 258static inline void 259set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) 260{ 261 RING_LOCALS; 262 DRM_DEBUG("\n"); 263 264 BEGIN_RING(12); 265 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); 266 OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); 267 OUT_RING((x1 << 0) | (y1 << 16)); 268 OUT_RING((x2 << 0) | (y2 << 16)); 269 270 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); 271 OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); 272 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); 273 OUT_RING((x2 << 0) | (y2 << 16)); 274 275 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); 276 OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); 277 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); 278 OUT_RING((x2 << 0) | (y2 << 16)); 279 ADVANCE_RING(); 280} 281 282static inline void 283draw_auto(drm_radeon_private_t *dev_priv) 284{ 285 RING_LOCALS; 286 DRM_DEBUG("\n"); 287 288 BEGIN_RING(10); 289 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); 290 OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2); 291 OUT_RING(DI_PT_RECTLIST); 292 293 OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); 294 OUT_RING(DI_INDEX_SIZE_16_BIT); 295 296 OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); 297 OUT_RING(1); 298 299 OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); 300 OUT_RING(3); 301 OUT_RING(DI_SRC_SEL_AUTO_INDEX); 302 303 ADVANCE_RING(); 304 COMMIT_RING(); 305} 306 307static inline void 308set_default_state(drm_radeon_private_t *dev_priv) 309{ 310 int i; 311 u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 312 u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 313 int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; 314 int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; 315 int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; 316 RING_LOCALS; 317 318 switch ((dev_priv->flags & RADEON_FAMILY_MASK)) { 319 case CHIP_R600: 320 num_ps_gprs = 192; 321 num_vs_gprs = 56; 322 num_temp_gprs = 4; 323 num_gs_gprs = 0; 324 num_es_gprs = 0; 325 num_ps_threads = 136; 326 num_vs_threads = 48; 327 num_gs_threads = 4; 328 num_es_threads = 4; 329 num_ps_stack_entries = 128; 330 num_vs_stack_entries = 128; 331 num_gs_stack_entries = 0; 332 num_es_stack_entries = 0; 333 break; 334 case CHIP_RV630: 335 case CHIP_RV635: 336 num_ps_gprs = 84; 337 num_vs_gprs = 36; 338 num_temp_gprs = 4; 339 num_gs_gprs = 0; 340 num_es_gprs = 0; 341 num_ps_threads = 144; 342 num_vs_threads = 40; 343 num_gs_threads = 4; 344 num_es_threads = 4; 345 num_ps_stack_entries = 40; 346 num_vs_stack_entries = 40; 347 num_gs_stack_entries = 32; 348 num_es_stack_entries = 16; 349 break; 350 case CHIP_RV610: 351 case CHIP_RV620: 352 case CHIP_RS780: 353 case CHIP_RS880: 354 default: 355 num_ps_gprs = 84; 356 num_vs_gprs = 36; 357 num_temp_gprs = 4; 358 num_gs_gprs = 0; 359 num_es_gprs = 0; 360 num_ps_threads = 136; 361 num_vs_threads = 48; 362 num_gs_threads = 4; 363 num_es_threads = 4; 364 num_ps_stack_entries = 40; 365 num_vs_stack_entries = 40; 366 num_gs_stack_entries = 32; 367 num_es_stack_entries = 16; 368 break; 369 case CHIP_RV670: 370 num_ps_gprs = 144; 371 num_vs_gprs = 40; 372 num_temp_gprs = 4; 373 num_gs_gprs = 0; 374 num_es_gprs = 0; 375 num_ps_threads = 136; 376 num_vs_threads = 48; 377 num_gs_threads = 4; 378 num_es_threads = 4; 379 num_ps_stack_entries = 40; 380 num_vs_stack_entries = 40; 381 num_gs_stack_entries = 32; 382 num_es_stack_entries = 16; 383 break; 384 case CHIP_RV770: 385 num_ps_gprs = 192; 386 num_vs_gprs = 56; 387 num_temp_gprs = 4; 388 num_gs_gprs = 0; 389 num_es_gprs = 0; 390 num_ps_threads = 188; 391 num_vs_threads = 60; 392 num_gs_threads = 0; 393 num_es_threads = 0; 394 num_ps_stack_entries = 256; 395 num_vs_stack_entries = 256; 396 num_gs_stack_entries = 0; 397 num_es_stack_entries = 0; 398 break; 399 case CHIP_RV730: 400 case CHIP_RV740: 401 num_ps_gprs = 84; 402 num_vs_gprs = 36; 403 num_temp_gprs = 4; 404 num_gs_gprs = 0; 405 num_es_gprs = 0; 406 num_ps_threads = 188; 407 num_vs_threads = 60; 408 num_gs_threads = 0; 409 num_es_threads = 0; 410 num_ps_stack_entries = 128; 411 num_vs_stack_entries = 128; 412 num_gs_stack_entries = 0; 413 num_es_stack_entries = 0; 414 break; 415 case CHIP_RV710: 416 num_ps_gprs = 192; 417 num_vs_gprs = 56; 418 num_temp_gprs = 4; 419 num_gs_gprs = 0; 420 num_es_gprs = 0; 421 num_ps_threads = 144; 422 num_vs_threads = 48; 423 num_gs_threads = 0; 424 num_es_threads = 0; 425 num_ps_stack_entries = 128; 426 num_vs_stack_entries = 128; 427 num_gs_stack_entries = 0; 428 num_es_stack_entries = 0; 429 break; 430 } 431 432 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 433 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 434 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || 435 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || 436 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) 437 sq_config = 0; 438 else 439 sq_config = R600_VC_ENABLE; 440 441 sq_config |= (R600_DX9_CONSTS | 442 R600_ALU_INST_PREFER_VECTOR | 443 R600_PS_PRIO(0) | 444 R600_VS_PRIO(1) | 445 R600_GS_PRIO(2) | 446 R600_ES_PRIO(3)); 447 448 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) | 449 R600_NUM_VS_GPRS(num_vs_gprs) | 450 R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); 451 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) | 452 R600_NUM_ES_GPRS(num_es_gprs)); 453 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) | 454 R600_NUM_VS_THREADS(num_vs_threads) | 455 R600_NUM_GS_THREADS(num_gs_threads) | 456 R600_NUM_ES_THREADS(num_es_threads)); 457 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | 458 R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); 459 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | 460 R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries)); 461 462 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) { 463 BEGIN_RING(r7xx_default_size + 10); 464 for (i = 0; i < r7xx_default_size; i++) 465 OUT_RING(r7xx_default_state[i]); 466 } else { 467 BEGIN_RING(r6xx_default_size + 10); 468 for (i = 0; i < r6xx_default_size; i++) 469 OUT_RING(r6xx_default_state[i]); 470 } 471 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); 472 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); 473 /* SQ config */ 474 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6)); 475 OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2); 476 OUT_RING(sq_config); 477 OUT_RING(sq_gpr_resource_mgmt_1); 478 OUT_RING(sq_gpr_resource_mgmt_2); 479 OUT_RING(sq_thread_resource_mgmt); 480 OUT_RING(sq_stack_resource_mgmt_1); 481 OUT_RING(sq_stack_resource_mgmt_2); 482 ADVANCE_RING(); 483} 484 485static inline uint32_t i2f(uint32_t input) 486{ 487 u32 result, i, exponent, fraction; 488 489 if ((input & 0x3fff) == 0) 490 result = 0; /* 0 is a special case */ 491 else { 492 exponent = 140; /* exponent biased by 127; */ 493 fraction = (input & 0x3fff) << 10; /* cheat and only 494 handle numbers below 2^^15 */ 495 for (i = 0; i < 14; i++) { 496 if (fraction & 0x800000) 497 break; 498 else { 499 fraction = fraction << 1; /* keep 500 shifting left until top bit = 1 */ 501 exponent = exponent - 1; 502 } 503 } 504 result = exponent << 23 | (fraction & 0x7fffff); /* mask 505 off top bit; assumed 1 */ 506 } 507 return result; 508} 509 510 511static inline int r600_nomm_get_vb(struct drm_device *dev) 512{ 513 drm_radeon_private_t *dev_priv = dev->dev_private; 514 dev_priv->blit_vb = radeon_freelist_get(dev); 515 if (!dev_priv->blit_vb) { 516 DRM_ERROR("Unable to allocate vertex buffer for blit\n"); 517 return -EAGAIN; 518 } 519 return 0; 520} 521 522static inline void r600_nomm_put_vb(struct drm_device *dev) 523{ 524 drm_radeon_private_t *dev_priv = dev->dev_private; 525 526 dev_priv->blit_vb->used = 0; 527 radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb); 528} 529 530static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev) 531{ 532 drm_radeon_private_t *dev_priv = dev->dev_private; 533 return (((char *)dev->agp_buffer_map->handle + 534 dev_priv->blit_vb->offset + dev_priv->blit_vb->used)); 535} 536 537int 538r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv) 539{ 540 drm_radeon_private_t *dev_priv = dev->dev_private; 541 int ret; 542 DRM_DEBUG("\n"); 543 544 ret = r600_nomm_get_vb(dev); 545 if (ret) 546 return ret; 547 548 dev_priv->blit_vb->file_priv = file_priv; 549 550 set_default_state(dev_priv); 551 set_shaders(dev); 552 553 return 0; 554} 555 556 557void 558r600_done_blit_copy(struct drm_device *dev) 559{ 560 drm_radeon_private_t *dev_priv = dev->dev_private; 561 RING_LOCALS; 562 DRM_DEBUG("\n"); 563 564 BEGIN_RING(5); 565 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); 566 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); 567 /* wait for 3D idle clean */ 568 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); 569 OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2); 570 OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN); 571 572 ADVANCE_RING(); 573 COMMIT_RING(); 574 575 r600_nomm_put_vb(dev); 576} 577 578void 579r600_blit_copy(struct drm_device *dev, 580 uint64_t src_gpu_addr, uint64_t dst_gpu_addr, 581 int size_bytes) 582{ 583 drm_radeon_private_t *dev_priv = dev->dev_private; 584 int max_bytes; 585 u64 vb_addr; 586 u32 *vb; 587 588 vb = r600_nomm_get_vb_ptr(dev); 589 590 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { 591 max_bytes = 8192; 592 593 while (size_bytes) { 594 int cur_size = size_bytes; 595 int src_x = src_gpu_addr & 255; 596 int dst_x = dst_gpu_addr & 255; 597 int h = 1; 598 src_gpu_addr = src_gpu_addr & ~255; 599 dst_gpu_addr = dst_gpu_addr & ~255; 600 601 if (!src_x && !dst_x) { 602 h = (cur_size / max_bytes); 603 if (h > 8192) 604 h = 8192; 605 if (h == 0) 606 h = 1; 607 else 608 cur_size = max_bytes; 609 } else { 610 if (cur_size > max_bytes) 611 cur_size = max_bytes; 612 if (cur_size > (max_bytes - dst_x)) 613 cur_size = (max_bytes - dst_x); 614 if (cur_size > (max_bytes - src_x)) 615 cur_size = (max_bytes - src_x); 616 } 617 618 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { 619 620 r600_nomm_put_vb(dev); 621 r600_nomm_get_vb(dev); 622 if (!dev_priv->blit_vb) 623 return; 624 set_shaders(dev); 625 vb = r600_nomm_get_vb_ptr(dev); 626 } 627 628 vb[0] = i2f(dst_x); 629 vb[1] = 0; 630 vb[2] = i2f(src_x); 631 vb[3] = 0; 632 633 vb[4] = i2f(dst_x); 634 vb[5] = i2f(h); 635 vb[6] = i2f(src_x); 636 vb[7] = i2f(h); 637 638 vb[8] = i2f(dst_x + cur_size); 639 vb[9] = i2f(h); 640 vb[10] = i2f(src_x + cur_size); 641 vb[11] = i2f(h); 642 643 /* src */ 644 set_tex_resource(dev_priv, FMT_8, 645 src_x + cur_size, h, src_x + cur_size, 646 src_gpu_addr); 647 648 cp_set_surface_sync(dev_priv, 649 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); 650 651 /* dst */ 652 set_render_target(dev_priv, COLOR_8, 653 dst_x + cur_size, h, 654 dst_gpu_addr); 655 656 /* scissors */ 657 set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h); 658 659 /* Vertex buffer setup */ 660 vb_addr = dev_priv->gart_buffers_offset + 661 dev_priv->blit_vb->offset + 662 dev_priv->blit_vb->used; 663 set_vtx_resource(dev_priv, vb_addr); 664 665 /* draw */ 666 draw_auto(dev_priv); 667 668 cp_set_surface_sync(dev_priv, 669 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, 670 cur_size * h, dst_gpu_addr); 671 672 vb += 12; 673 dev_priv->blit_vb->used += 12 * 4; 674 675 src_gpu_addr += cur_size * h; 676 dst_gpu_addr += cur_size * h; 677 size_bytes -= cur_size * h; 678 } 679 } else { 680 max_bytes = 8192 * 4; 681 682 while (size_bytes) { 683 int cur_size = size_bytes; 684 int src_x = (src_gpu_addr & 255); 685 int dst_x = (dst_gpu_addr & 255); 686 int h = 1; 687 src_gpu_addr = src_gpu_addr & ~255; 688 dst_gpu_addr = dst_gpu_addr & ~255; 689 690 if (!src_x && !dst_x) { 691 h = (cur_size / max_bytes); 692 if (h > 8192) 693 h = 8192; 694 if (h == 0) 695 h = 1; 696 else 697 cur_size = max_bytes; 698 } else { 699 if (cur_size > max_bytes) 700 cur_size = max_bytes; 701 if (cur_size > (max_bytes - dst_x)) 702 cur_size = (max_bytes - dst_x); 703 if (cur_size > (max_bytes - src_x)) 704 cur_size = (max_bytes - src_x); 705 } 706 707 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { 708 r600_nomm_put_vb(dev); 709 r600_nomm_get_vb(dev); 710 if (!dev_priv->blit_vb) 711 return; 712 713 set_shaders(dev); 714 vb = r600_nomm_get_vb_ptr(dev); 715 } 716 717 vb[0] = i2f(dst_x / 4); 718 vb[1] = 0; 719 vb[2] = i2f(src_x / 4); 720 vb[3] = 0; 721 722 vb[4] = i2f(dst_x / 4); 723 vb[5] = i2f(h); 724 vb[6] = i2f(src_x / 4); 725 vb[7] = i2f(h); 726 727 vb[8] = i2f((dst_x + cur_size) / 4); 728 vb[9] = i2f(h); 729 vb[10] = i2f((src_x + cur_size) / 4); 730 vb[11] = i2f(h); 731 732 /* src */ 733 set_tex_resource(dev_priv, FMT_8_8_8_8, 734 (src_x + cur_size) / 4, 735 h, (src_x + cur_size) / 4, 736 src_gpu_addr); 737 738 cp_set_surface_sync(dev_priv, 739 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); 740 741 /* dst */ 742 set_render_target(dev_priv, COLOR_8_8_8_8, 743 (dst_x + cur_size) / 4, h, 744 dst_gpu_addr); 745 746 /* scissors */ 747 set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h); 748 749 /* Vertex buffer setup */ 750 vb_addr = dev_priv->gart_buffers_offset + 751 dev_priv->blit_vb->offset + 752 dev_priv->blit_vb->used; 753 set_vtx_resource(dev_priv, vb_addr); 754 755 /* draw */ 756 draw_auto(dev_priv); 757 758 cp_set_surface_sync(dev_priv, 759 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, 760 cur_size * h, dst_gpu_addr); 761 762 vb += 12; 763 dev_priv->blit_vb->used += 12 * 4; 764 765 src_gpu_addr += cur_size * h; 766 dst_gpu_addr += cur_size * h; 767 size_bytes -= cur_size * h; 768 } 769 } 770} 771 772void 773r600_blit_swap(struct drm_device *dev, 774 uint64_t src_gpu_addr, uint64_t dst_gpu_addr, 775 int sx, int sy, int dx, int dy, 776 int w, int h, int src_pitch, int dst_pitch, int cpp) 777{ 778 drm_radeon_private_t *dev_priv = dev->dev_private; 779 int cb_format, tex_format; 780 int sx2, sy2, dx2, dy2; 781 u64 vb_addr; 782 u32 *vb; 783 784 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { 785 786 r600_nomm_put_vb(dev); 787 r600_nomm_get_vb(dev); 788 if (!dev_priv->blit_vb) 789 return; 790 791 set_shaders(dev); 792 } 793 vb = r600_nomm_get_vb_ptr(dev); 794 795 sx2 = sx + w; 796 sy2 = sy + h; 797 dx2 = dx + w; 798 dy2 = dy + h; 799 800 vb[0] = i2f(dx); 801 vb[1] = i2f(dy); 802 vb[2] = i2f(sx); 803 vb[3] = i2f(sy); 804 805 vb[4] = i2f(dx); 806 vb[5] = i2f(dy2); 807 vb[6] = i2f(sx); 808 vb[7] = i2f(sy2); 809 810 vb[8] = i2f(dx2); 811 vb[9] = i2f(dy2); 812 vb[10] = i2f(sx2); 813 vb[11] = i2f(sy2); 814 815 switch(cpp) { 816 case 4: 817 cb_format = COLOR_8_8_8_8; 818 tex_format = FMT_8_8_8_8; 819 break; 820 case 2: 821 cb_format = COLOR_5_6_5; 822 tex_format = FMT_5_6_5; 823 break; 824 default: 825 cb_format = COLOR_8; 826 tex_format = FMT_8; 827 break; 828 } 829 830 /* src */ 831 set_tex_resource(dev_priv, tex_format, 832 src_pitch / cpp, 833 sy2, src_pitch / cpp, 834 src_gpu_addr); 835 836 cp_set_surface_sync(dev_priv, 837 R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr); 838 839 /* dst */ 840 set_render_target(dev_priv, cb_format, 841 dst_pitch / cpp, dy2, 842 dst_gpu_addr); 843 844 /* scissors */ 845 set_scissors(dev_priv, dx, dy, dx2, dy2); 846 847 /* Vertex buffer setup */ 848 vb_addr = dev_priv->gart_buffers_offset + 849 dev_priv->blit_vb->offset + 850 dev_priv->blit_vb->used; 851 set_vtx_resource(dev_priv, vb_addr); 852 853 /* draw */ 854 draw_auto(dev_priv); 855 856 cp_set_surface_sync(dev_priv, 857 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, 858 dst_pitch * dy2, dst_gpu_addr); 859 860 dev_priv->blit_vb->used += 12 * 4; 861} 862