r600_blit.c revision 285830
129088Smarkm/* 229088Smarkm * Copyright 2009 Advanced Micro Devices, Inc. 329088Smarkm * 429088Smarkm * Permission is hereby granted, free of charge, to any person obtaining a 529088Smarkm * copy of this software and associated documentation files (the "Software"), 629088Smarkm * to deal in the Software without restriction, including without limitation 729088Smarkm * the rights to use, copy, modify, merge, publish, distribute, sublicense, 829088Smarkm * and/or sell copies of the Software, and to permit persons to whom the 929088Smarkm * Software is furnished to do so, subject to the following conditions: 1029088Smarkm * 1129088Smarkm * The above copyright notice and this permission notice (including the next 1229088Smarkm * paragraph) shall be included in all copies or substantial portions of the 1329088Smarkm * Software. 1429088Smarkm * 1529088Smarkm * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1629088Smarkm * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1729088Smarkm * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1829088Smarkm * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 1929088Smarkm * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 2029088Smarkm * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2129088Smarkm * DEALINGS IN THE SOFTWARE. 2229088Smarkm * 2329088Smarkm * Authors: 2429088Smarkm * Alex Deucher <alexander.deucher@amd.com> 2529088Smarkm */ 2629088Smarkm 2729088Smarkm#include <sys/cdefs.h> 2829088Smarkm__FBSDID("$FreeBSD: releng/10.2/sys/dev/drm2/radeon/r600_blit.c 282199 2015-04-28 19:35:05Z dumbbell $"); 2929088Smarkm 3029088Smarkm#include <dev/drm2/drmP.h> 3129088Smarkm#include <dev/drm2/radeon/radeon_drm.h> 3229088Smarkm#include "radeon_drv.h" 3329088Smarkm 3472139Sasmodai#include "r600_blit_shaders.h" 3529088Smarkm 3629088Smarkm#define DI_PT_RECTLIST 0x11 3729088Smarkm#define DI_INDEX_SIZE_16_BIT 0x0 3829088Smarkm#define DI_SRC_SEL_AUTO_INDEX 0x2 3929088Smarkm 4029088Smarkm#define FMT_8 0x1 4129088Smarkm#define FMT_5_6_5 0x8 4229088Smarkm#define FMT_8_8_8_8 0x1a 4329088Smarkm#define COLOR_8 0x1 4429088Smarkm#define COLOR_5_6_5 0x8 4529088Smarkm#define COLOR_8_8_8_8 0x1a 4681965Smarkm 4729088Smarkmstatic void 4829088Smarkmset_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr) 4929088Smarkm{ 5029088Smarkm u32 cb_color_info; 5129088Smarkm int pitch, slice; 5229088Smarkm RING_LOCALS; 5329088Smarkm DRM_DEBUG("\n"); 5429088Smarkm 5529088Smarkm h = roundup2(h, 8); 5629088Smarkm if (h < 8) 57 h = 8; 58 59 cb_color_info = ((format << 2) | (1 << 27)); 60 pitch = (w / 8) - 1; 61 slice = ((w * h) / 64) - 1; 62 63 if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) && 64 ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) { 65 BEGIN_RING(21 + 2); 66 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 67 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); 68 OUT_RING(gpu_addr >> 8); 69 OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); 70 OUT_RING(2 << 0); 71 } else { 72 BEGIN_RING(21); 73 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 74 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2); 75 OUT_RING(gpu_addr >> 8); 76 } 77 78 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 79 OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2); 80 OUT_RING((pitch << 0) | (slice << 10)); 81 82 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 83 OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2); 84 OUT_RING(0); 85 86 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 87 OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2); 88 OUT_RING(cb_color_info); 89 90 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 91 OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2); 92 OUT_RING(0); 93 94 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 95 OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2); 96 OUT_RING(0); 97 98 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 99 OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2); 100 OUT_RING(0); 101 102 ADVANCE_RING(); 103} 104 105static void 106cp_set_surface_sync(drm_radeon_private_t *dev_priv, 107 u32 sync_type, u32 size, u64 mc_addr) 108{ 109 u32 cp_coher_size; 110 RING_LOCALS; 111 DRM_DEBUG("\n"); 112 113 if (size == 0xffffffff) 114 cp_coher_size = 0xffffffff; 115 else 116 cp_coher_size = ((size + 255) >> 8); 117 118 BEGIN_RING(5); 119 OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3)); 120 OUT_RING(sync_type); 121 OUT_RING(cp_coher_size); 122 OUT_RING((mc_addr >> 8)); 123 OUT_RING(10); /* poll interval */ 124 ADVANCE_RING(); 125} 126 127static void 128set_shaders(struct drm_device *dev) 129{ 130 drm_radeon_private_t *dev_priv = dev->dev_private; 131 u64 gpu_addr; 132 int i; 133 u32 *vs, *ps; 134 uint32_t sq_pgm_resources; 135 RING_LOCALS; 136 DRM_DEBUG("\n"); 137 138 /* load shaders */ 139 vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset); 140 ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256); 141 142 for (i = 0; i < r6xx_vs_size; i++) 143 vs[i] = cpu_to_le32(r6xx_vs[i]); 144 for (i = 0; i < r6xx_ps_size; i++) 145 ps[i] = cpu_to_le32(r6xx_ps[i]); 146 147 dev_priv->blit_vb->used = 512; 148 149 gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset; 150 151 /* setup shader regs */ 152 sq_pgm_resources = (1 << 0); 153 154 BEGIN_RING(9 + 12); 155 /* VS */ 156 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 157 OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 158 OUT_RING(gpu_addr >> 8); 159 160 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 161 OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 162 OUT_RING(sq_pgm_resources); 163 164 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 165 OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 166 OUT_RING(0); 167 168 /* PS */ 169 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 170 OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 171 OUT_RING((gpu_addr + 256) >> 8); 172 173 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 174 OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 175 OUT_RING(sq_pgm_resources | (1 << 28)); 176 177 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 178 OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 179 OUT_RING(2); 180 181 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1)); 182 OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2); 183 OUT_RING(0); 184 ADVANCE_RING(); 185 186 cp_set_surface_sync(dev_priv, 187 R600_SH_ACTION_ENA, 512, gpu_addr); 188} 189 190static void 191set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) 192{ 193 uint32_t sq_vtx_constant_word2; 194 RING_LOCALS; 195 DRM_DEBUG("\n"); 196 197 sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8)); 198#ifdef __BIG_ENDIAN 199 sq_vtx_constant_word2 |= (2U << 30); 200#endif 201 202 BEGIN_RING(9); 203 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); 204 OUT_RING(0x460); 205 OUT_RING(gpu_addr & 0xffffffff); 206 OUT_RING(48 - 1); 207 OUT_RING(sq_vtx_constant_word2); 208 OUT_RING(1 << 0); 209 OUT_RING(0); 210 OUT_RING(0); 211 OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30); 212 ADVANCE_RING(); 213 214 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 215 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 216 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || 217 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || 218 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) 219 cp_set_surface_sync(dev_priv, 220 R600_TC_ACTION_ENA, 48, gpu_addr); 221 else 222 cp_set_surface_sync(dev_priv, 223 R600_VC_ACTION_ENA, 48, gpu_addr); 224} 225 226static void 227set_tex_resource(drm_radeon_private_t *dev_priv, 228 int format, int w, int h, int pitch, u64 gpu_addr) 229{ 230 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; 231 RING_LOCALS; 232 DRM_DEBUG("\n"); 233 234 if (h < 1) 235 h = 1; 236 237 sq_tex_resource_word0 = (1 << 0); 238 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | 239 ((w - 1) << 19)); 240 241 sq_tex_resource_word1 = (format << 26); 242 sq_tex_resource_word1 |= ((h - 1) << 0); 243 244 sq_tex_resource_word4 = ((1 << 14) | 245 (0 << 16) | 246 (1 << 19) | 247 (2 << 22) | 248 (3 << 25)); 249 250 BEGIN_RING(9); 251 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); 252 OUT_RING(0); 253 OUT_RING(sq_tex_resource_word0); 254 OUT_RING(sq_tex_resource_word1); 255 OUT_RING(gpu_addr >> 8); 256 OUT_RING(gpu_addr >> 8); 257 OUT_RING(sq_tex_resource_word4); 258 OUT_RING(0); 259 OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30); 260 ADVANCE_RING(); 261 262} 263 264static void 265set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) 266{ 267 RING_LOCALS; 268 DRM_DEBUG("\n"); 269 270 BEGIN_RING(12); 271 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); 272 OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); 273 OUT_RING((x1 << 0) | (y1 << 16)); 274 OUT_RING((x2 << 0) | (y2 << 16)); 275 276 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); 277 OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); 278 OUT_RING((x1 << 0) | (y1 << 16) | (1U << 31)); 279 OUT_RING((x2 << 0) | (y2 << 16)); 280 281 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); 282 OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); 283 OUT_RING((x1 << 0) | (y1 << 16) | (1U << 31)); 284 OUT_RING((x2 << 0) | (y2 << 16)); 285 ADVANCE_RING(); 286} 287 288static void 289draw_auto(drm_radeon_private_t *dev_priv) 290{ 291 RING_LOCALS; 292 DRM_DEBUG("\n"); 293 294 BEGIN_RING(10); 295 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); 296 OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2); 297 OUT_RING(DI_PT_RECTLIST); 298 299 OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); 300#ifdef __BIG_ENDIAN 301 OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT); 302#else 303 OUT_RING(DI_INDEX_SIZE_16_BIT); 304#endif 305 306 OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); 307 OUT_RING(1); 308 309 OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); 310 OUT_RING(3); 311 OUT_RING(DI_SRC_SEL_AUTO_INDEX); 312 313 ADVANCE_RING(); 314 COMMIT_RING(); 315} 316 317static void 318set_default_state(drm_radeon_private_t *dev_priv) 319{ 320 int i; 321 u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; 322 u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; 323 int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; 324 int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; 325 int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; 326 RING_LOCALS; 327 328 switch ((dev_priv->flags & RADEON_FAMILY_MASK)) { 329 case CHIP_R600: 330 num_ps_gprs = 192; 331 num_vs_gprs = 56; 332 num_temp_gprs = 4; 333 num_gs_gprs = 0; 334 num_es_gprs = 0; 335 num_ps_threads = 136; 336 num_vs_threads = 48; 337 num_gs_threads = 4; 338 num_es_threads = 4; 339 num_ps_stack_entries = 128; 340 num_vs_stack_entries = 128; 341 num_gs_stack_entries = 0; 342 num_es_stack_entries = 0; 343 break; 344 case CHIP_RV630: 345 case CHIP_RV635: 346 num_ps_gprs = 84; 347 num_vs_gprs = 36; 348 num_temp_gprs = 4; 349 num_gs_gprs = 0; 350 num_es_gprs = 0; 351 num_ps_threads = 144; 352 num_vs_threads = 40; 353 num_gs_threads = 4; 354 num_es_threads = 4; 355 num_ps_stack_entries = 40; 356 num_vs_stack_entries = 40; 357 num_gs_stack_entries = 32; 358 num_es_stack_entries = 16; 359 break; 360 case CHIP_RV610: 361 case CHIP_RV620: 362 case CHIP_RS780: 363 case CHIP_RS880: 364 default: 365 num_ps_gprs = 84; 366 num_vs_gprs = 36; 367 num_temp_gprs = 4; 368 num_gs_gprs = 0; 369 num_es_gprs = 0; 370 num_ps_threads = 136; 371 num_vs_threads = 48; 372 num_gs_threads = 4; 373 num_es_threads = 4; 374 num_ps_stack_entries = 40; 375 num_vs_stack_entries = 40; 376 num_gs_stack_entries = 32; 377 num_es_stack_entries = 16; 378 break; 379 case CHIP_RV670: 380 num_ps_gprs = 144; 381 num_vs_gprs = 40; 382 num_temp_gprs = 4; 383 num_gs_gprs = 0; 384 num_es_gprs = 0; 385 num_ps_threads = 136; 386 num_vs_threads = 48; 387 num_gs_threads = 4; 388 num_es_threads = 4; 389 num_ps_stack_entries = 40; 390 num_vs_stack_entries = 40; 391 num_gs_stack_entries = 32; 392 num_es_stack_entries = 16; 393 break; 394 case CHIP_RV770: 395 num_ps_gprs = 192; 396 num_vs_gprs = 56; 397 num_temp_gprs = 4; 398 num_gs_gprs = 0; 399 num_es_gprs = 0; 400 num_ps_threads = 188; 401 num_vs_threads = 60; 402 num_gs_threads = 0; 403 num_es_threads = 0; 404 num_ps_stack_entries = 256; 405 num_vs_stack_entries = 256; 406 num_gs_stack_entries = 0; 407 num_es_stack_entries = 0; 408 break; 409 case CHIP_RV730: 410 case CHIP_RV740: 411 num_ps_gprs = 84; 412 num_vs_gprs = 36; 413 num_temp_gprs = 4; 414 num_gs_gprs = 0; 415 num_es_gprs = 0; 416 num_ps_threads = 188; 417 num_vs_threads = 60; 418 num_gs_threads = 0; 419 num_es_threads = 0; 420 num_ps_stack_entries = 128; 421 num_vs_stack_entries = 128; 422 num_gs_stack_entries = 0; 423 num_es_stack_entries = 0; 424 break; 425 case CHIP_RV710: 426 num_ps_gprs = 192; 427 num_vs_gprs = 56; 428 num_temp_gprs = 4; 429 num_gs_gprs = 0; 430 num_es_gprs = 0; 431 num_ps_threads = 144; 432 num_vs_threads = 48; 433 num_gs_threads = 0; 434 num_es_threads = 0; 435 num_ps_stack_entries = 128; 436 num_vs_stack_entries = 128; 437 num_gs_stack_entries = 0; 438 num_es_stack_entries = 0; 439 break; 440 } 441 442 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 443 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 444 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || 445 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || 446 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) 447 sq_config = 0; 448 else 449 sq_config = R600_VC_ENABLE; 450 451 sq_config |= (R600_DX9_CONSTS | 452 R600_ALU_INST_PREFER_VECTOR | 453 R600_PS_PRIO(0) | 454 R600_VS_PRIO(1) | 455 R600_GS_PRIO(2) | 456 R600_ES_PRIO(3)); 457 458 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) | 459 R600_NUM_VS_GPRS(num_vs_gprs) | 460 R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); 461 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) | 462 R600_NUM_ES_GPRS(num_es_gprs)); 463 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) | 464 R600_NUM_VS_THREADS(num_vs_threads) | 465 R600_NUM_GS_THREADS(num_gs_threads) | 466 R600_NUM_ES_THREADS(num_es_threads)); 467 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | 468 R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); 469 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | 470 R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries)); 471 472 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) { 473 BEGIN_RING(r7xx_default_size + 10); 474 for (i = 0; i < r7xx_default_size; i++) 475 OUT_RING(r7xx_default_state[i]); 476 } else { 477 BEGIN_RING(r6xx_default_size + 10); 478 for (i = 0; i < r6xx_default_size; i++) 479 OUT_RING(r6xx_default_state[i]); 480 } 481 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); 482 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); 483 /* SQ config */ 484 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6)); 485 OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2); 486 OUT_RING(sq_config); 487 OUT_RING(sq_gpr_resource_mgmt_1); 488 OUT_RING(sq_gpr_resource_mgmt_2); 489 OUT_RING(sq_thread_resource_mgmt); 490 OUT_RING(sq_stack_resource_mgmt_1); 491 OUT_RING(sq_stack_resource_mgmt_2); 492 ADVANCE_RING(); 493} 494 495/* 23 bits of float fractional data */ 496#define I2F_FRAC_BITS 23 497#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1) 498 499/* 500 * Converts unsigned integer into 32-bit IEEE floating point representation. 501 * Will be exact from 0 to 2^24. Above that, we round towards zero 502 * as the fractional bits will not fit in a float. (It would be better to 503 * round towards even as the fpu does, but that is slower.) 504 */ 505__pure uint32_t int2float(uint32_t x) 506{ 507 uint32_t msb, exponent, fraction; 508 509 /* Zero is special */ 510 if (!x) return 0; 511 512 /* Get location of the most significant bit */ 513 msb = fls(x); 514 515 /* 516 * Use a rotate instead of a shift because that works both leftwards 517 * and rightwards due to the mod(32) behaviour. This means we don't 518 * need to check to see if we are above 2^24 or not. 519 */ 520 fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK; 521 exponent = (127 + msb) << I2F_FRAC_BITS; 522 523 return fraction + exponent; 524} 525 526static int r600_nomm_get_vb(struct drm_device *dev) 527{ 528 drm_radeon_private_t *dev_priv = dev->dev_private; 529 dev_priv->blit_vb = radeon_freelist_get(dev); 530 if (!dev_priv->blit_vb) { 531 DRM_ERROR("Unable to allocate vertex buffer for blit\n"); 532 return -EAGAIN; 533 } 534 return 0; 535} 536 537static void r600_nomm_put_vb(struct drm_device *dev) 538{ 539 drm_radeon_private_t *dev_priv = dev->dev_private; 540 541 dev_priv->blit_vb->used = 0; 542 radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb); 543} 544 545static void *r600_nomm_get_vb_ptr(struct drm_device *dev) 546{ 547 drm_radeon_private_t *dev_priv = dev->dev_private; 548 return (((char *)dev->agp_buffer_map->handle + 549 dev_priv->blit_vb->offset + dev_priv->blit_vb->used)); 550} 551 552int 553r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv) 554{ 555 drm_radeon_private_t *dev_priv = dev->dev_private; 556 int ret; 557 DRM_DEBUG("\n"); 558 559 ret = r600_nomm_get_vb(dev); 560 if (ret) 561 return ret; 562 563 dev_priv->blit_vb->file_priv = file_priv; 564 565 set_default_state(dev_priv); 566 set_shaders(dev); 567 568 return 0; 569} 570 571 572void 573r600_done_blit_copy(struct drm_device *dev) 574{ 575 drm_radeon_private_t *dev_priv = dev->dev_private; 576 RING_LOCALS; 577 DRM_DEBUG("\n"); 578 579 BEGIN_RING(5); 580 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); 581 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); 582 /* wait for 3D idle clean */ 583 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); 584 OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2); 585 OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN); 586 587 ADVANCE_RING(); 588 COMMIT_RING(); 589 590 r600_nomm_put_vb(dev); 591} 592 593void 594r600_blit_copy(struct drm_device *dev, 595 uint64_t src_gpu_addr, uint64_t dst_gpu_addr, 596 int size_bytes) 597{ 598 drm_radeon_private_t *dev_priv = dev->dev_private; 599 int max_bytes; 600 u64 vb_addr; 601 u32 *vb; 602 603 vb = r600_nomm_get_vb_ptr(dev); 604 605 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { 606 max_bytes = 8192; 607 608 while (size_bytes) { 609 int cur_size = size_bytes; 610 int src_x = src_gpu_addr & 255; 611 int dst_x = dst_gpu_addr & 255; 612 int h = 1; 613 src_gpu_addr = src_gpu_addr & ~255; 614 dst_gpu_addr = dst_gpu_addr & ~255; 615 616 if (!src_x && !dst_x) { 617 h = (cur_size / max_bytes); 618 if (h > 8192) 619 h = 8192; 620 if (h == 0) 621 h = 1; 622 else 623 cur_size = max_bytes; 624 } else { 625 if (cur_size > max_bytes) 626 cur_size = max_bytes; 627 if (cur_size > (max_bytes - dst_x)) 628 cur_size = (max_bytes - dst_x); 629 if (cur_size > (max_bytes - src_x)) 630 cur_size = (max_bytes - src_x); 631 } 632 633 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { 634 635 r600_nomm_put_vb(dev); 636 r600_nomm_get_vb(dev); 637 if (!dev_priv->blit_vb) 638 return; 639 set_shaders(dev); 640 vb = r600_nomm_get_vb_ptr(dev); 641 } 642 643 vb[0] = int2float(dst_x); 644 vb[1] = 0; 645 vb[2] = int2float(src_x); 646 vb[3] = 0; 647 648 vb[4] = int2float(dst_x); 649 vb[5] = int2float(h); 650 vb[6] = int2float(src_x); 651 vb[7] = int2float(h); 652 653 vb[8] = int2float(dst_x + cur_size); 654 vb[9] = int2float(h); 655 vb[10] = int2float(src_x + cur_size); 656 vb[11] = int2float(h); 657 658 /* src */ 659 set_tex_resource(dev_priv, FMT_8, 660 src_x + cur_size, h, src_x + cur_size, 661 src_gpu_addr); 662 663 cp_set_surface_sync(dev_priv, 664 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); 665 666 /* dst */ 667 set_render_target(dev_priv, COLOR_8, 668 dst_x + cur_size, h, 669 dst_gpu_addr); 670 671 /* scissors */ 672 set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h); 673 674 /* Vertex buffer setup */ 675 vb_addr = dev_priv->gart_buffers_offset + 676 dev_priv->blit_vb->offset + 677 dev_priv->blit_vb->used; 678 set_vtx_resource(dev_priv, vb_addr); 679 680 /* draw */ 681 draw_auto(dev_priv); 682 683 cp_set_surface_sync(dev_priv, 684 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, 685 cur_size * h, dst_gpu_addr); 686 687 vb += 12; 688 dev_priv->blit_vb->used += 12 * 4; 689 690 src_gpu_addr += cur_size * h; 691 dst_gpu_addr += cur_size * h; 692 size_bytes -= cur_size * h; 693 } 694 } else { 695 max_bytes = 8192 * 4; 696 697 while (size_bytes) { 698 int cur_size = size_bytes; 699 int src_x = (src_gpu_addr & 255); 700 int dst_x = (dst_gpu_addr & 255); 701 int h = 1; 702 src_gpu_addr = src_gpu_addr & ~255; 703 dst_gpu_addr = dst_gpu_addr & ~255; 704 705 if (!src_x && !dst_x) { 706 h = (cur_size / max_bytes); 707 if (h > 8192) 708 h = 8192; 709 if (h == 0) 710 h = 1; 711 else 712 cur_size = max_bytes; 713 } else { 714 if (cur_size > max_bytes) 715 cur_size = max_bytes; 716 if (cur_size > (max_bytes - dst_x)) 717 cur_size = (max_bytes - dst_x); 718 if (cur_size > (max_bytes - src_x)) 719 cur_size = (max_bytes - src_x); 720 } 721 722 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { 723 r600_nomm_put_vb(dev); 724 r600_nomm_get_vb(dev); 725 if (!dev_priv->blit_vb) 726 return; 727 728 set_shaders(dev); 729 vb = r600_nomm_get_vb_ptr(dev); 730 } 731 732 vb[0] = int2float(dst_x / 4); 733 vb[1] = 0; 734 vb[2] = int2float(src_x / 4); 735 vb[3] = 0; 736 737 vb[4] = int2float(dst_x / 4); 738 vb[5] = int2float(h); 739 vb[6] = int2float(src_x / 4); 740 vb[7] = int2float(h); 741 742 vb[8] = int2float((dst_x + cur_size) / 4); 743 vb[9] = int2float(h); 744 vb[10] = int2float((src_x + cur_size) / 4); 745 vb[11] = int2float(h); 746 747 /* src */ 748 set_tex_resource(dev_priv, FMT_8_8_8_8, 749 (src_x + cur_size) / 4, 750 h, (src_x + cur_size) / 4, 751 src_gpu_addr); 752 753 cp_set_surface_sync(dev_priv, 754 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); 755 756 /* dst */ 757 set_render_target(dev_priv, COLOR_8_8_8_8, 758 (dst_x + cur_size) / 4, h, 759 dst_gpu_addr); 760 761 /* scissors */ 762 set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h); 763 764 /* Vertex buffer setup */ 765 vb_addr = dev_priv->gart_buffers_offset + 766 dev_priv->blit_vb->offset + 767 dev_priv->blit_vb->used; 768 set_vtx_resource(dev_priv, vb_addr); 769 770 /* draw */ 771 draw_auto(dev_priv); 772 773 cp_set_surface_sync(dev_priv, 774 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, 775 cur_size * h, dst_gpu_addr); 776 777 vb += 12; 778 dev_priv->blit_vb->used += 12 * 4; 779 780 src_gpu_addr += cur_size * h; 781 dst_gpu_addr += cur_size * h; 782 size_bytes -= cur_size * h; 783 } 784 } 785} 786 787void 788r600_blit_swap(struct drm_device *dev, 789 uint64_t src_gpu_addr, uint64_t dst_gpu_addr, 790 int sx, int sy, int dx, int dy, 791 int w, int h, int src_pitch, int dst_pitch, int cpp) 792{ 793 drm_radeon_private_t *dev_priv = dev->dev_private; 794 int cb_format, tex_format; 795 int sx2, sy2, dx2, dy2; 796 u64 vb_addr; 797 u32 *vb; 798 799 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) { 800 801 r600_nomm_put_vb(dev); 802 r600_nomm_get_vb(dev); 803 if (!dev_priv->blit_vb) 804 return; 805 806 set_shaders(dev); 807 } 808 vb = r600_nomm_get_vb_ptr(dev); 809 810 sx2 = sx + w; 811 sy2 = sy + h; 812 dx2 = dx + w; 813 dy2 = dy + h; 814 815 vb[0] = int2float(dx); 816 vb[1] = int2float(dy); 817 vb[2] = int2float(sx); 818 vb[3] = int2float(sy); 819 820 vb[4] = int2float(dx); 821 vb[5] = int2float(dy2); 822 vb[6] = int2float(sx); 823 vb[7] = int2float(sy2); 824 825 vb[8] = int2float(dx2); 826 vb[9] = int2float(dy2); 827 vb[10] = int2float(sx2); 828 vb[11] = int2float(sy2); 829 830 switch(cpp) { 831 case 4: 832 cb_format = COLOR_8_8_8_8; 833 tex_format = FMT_8_8_8_8; 834 break; 835 case 2: 836 cb_format = COLOR_5_6_5; 837 tex_format = FMT_5_6_5; 838 break; 839 default: 840 cb_format = COLOR_8; 841 tex_format = FMT_8; 842 break; 843 } 844 845 /* src */ 846 set_tex_resource(dev_priv, tex_format, 847 src_pitch / cpp, 848 sy2, src_pitch / cpp, 849 src_gpu_addr); 850 851 cp_set_surface_sync(dev_priv, 852 R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr); 853 854 /* dst */ 855 set_render_target(dev_priv, cb_format, 856 dst_pitch / cpp, dy2, 857 dst_gpu_addr); 858 859 /* scissors */ 860 set_scissors(dev_priv, dx, dy, dx2, dy2); 861 862 /* Vertex buffer setup */ 863 vb_addr = dev_priv->gart_buffers_offset + 864 dev_priv->blit_vb->offset + 865 dev_priv->blit_vb->used; 866 set_vtx_resource(dev_priv, vb_addr); 867 868 /* draw */ 869 draw_auto(dev_priv); 870 871 cp_set_surface_sync(dev_priv, 872 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA, 873 dst_pitch * dy2, dst_gpu_addr); 874 875 dev_priv->blit_vb->used += 12 * 4; 876} 877