1/* $NetBSD: radeon_r200.c,v 1.2 2021/12/18 23:45:43 riastradh Exp $ */ 2 3/* 4 * Copyright 2008 Advanced Micro Devices, Inc. 5 * Copyright 2008 Red Hat Inc. 6 * Copyright 2009 Jerome Glisse. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 * 26 * Authors: Dave Airlie 27 * Alex Deucher 28 * Jerome Glisse 29 */ 30 31#include <sys/cdefs.h> 32__KERNEL_RCSID(0, "$NetBSD: radeon_r200.c,v 1.2 2021/12/18 23:45:43 riastradh Exp $"); 33 34#include <drm/radeon_drm.h> 35#include "radeon_reg.h" 36#include "radeon.h" 37#include "radeon_asic.h" 38 39#include "r100d.h" 40#include "r200_reg_safe.h" 41 42#include "r100_track.h" 43 44static int r200_get_vtx_size_0(uint32_t vtx_fmt_0) 45{ 46 int vtx_size, i; 47 vtx_size = 2; 48 49 if (vtx_fmt_0 & R200_VTX_Z0) 50 vtx_size++; 51 if (vtx_fmt_0 & R200_VTX_W0) 52 vtx_size++; 53 /* blend weight */ 54 if (vtx_fmt_0 & (0x7 << R200_VTX_WEIGHT_COUNT_SHIFT)) 55 vtx_size += (vtx_fmt_0 >> R200_VTX_WEIGHT_COUNT_SHIFT) & 0x7; 56 if (vtx_fmt_0 & R200_VTX_PV_MATRIX_SEL) 57 vtx_size++; 58 if (vtx_fmt_0 & R200_VTX_N0) 59 vtx_size += 3; 60 if (vtx_fmt_0 & R200_VTX_POINT_SIZE) 61 vtx_size++; 62 if (vtx_fmt_0 & R200_VTX_DISCRETE_FOG) 63 vtx_size++; 64 if (vtx_fmt_0 & R200_VTX_SHININESS_0) 65 vtx_size++; 66 if (vtx_fmt_0 & R200_VTX_SHININESS_1) 67 vtx_size++; 68 for (i = 0; i < 8; i++) { 69 int color_size = (vtx_fmt_0 >> (11 + 2*i)) & 0x3; 70 switch (color_size) { 71 case 0: break; 72 case 1: vtx_size++; break; 73 case 2: vtx_size += 3; break; 74 case 3: vtx_size += 4; break; 75 } 76 } 77 if (vtx_fmt_0 & R200_VTX_XY1) 78 vtx_size += 2; 79 if (vtx_fmt_0 & R200_VTX_Z1) 80 vtx_size++; 81 if (vtx_fmt_0 & R200_VTX_W1) 82 vtx_size++; 83 if (vtx_fmt_0 & R200_VTX_N1) 84 vtx_size += 3; 85 return vtx_size; 86} 87 88struct radeon_fence *r200_copy_dma(struct radeon_device *rdev, 89 uint64_t src_offset, 90 uint64_t dst_offset, 91 unsigned num_gpu_pages, 92 struct dma_resv *resv) 93{ 94 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 95 struct radeon_fence *fence; 96 uint32_t size; 97 uint32_t cur_size; 98 int i, num_loops; 99 int r = 0; 100 101 /* radeon pitch is /64 */ 102 size = num_gpu_pages << RADEON_GPU_PAGE_SHIFT; 103 num_loops = DIV_ROUND_UP(size, 0x1FFFFF); 104 r = radeon_ring_lock(rdev, ring, num_loops * 4 + 64); 105 if (r) { 106 DRM_ERROR("radeon: moving bo (%d).\n", r); 107 return ERR_PTR(r); 108 } 109 /* Must wait for 2D idle & clean before DMA or hangs might happen */ 110 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 111 radeon_ring_write(ring, (1 << 16)); 112 for (i = 0; i < num_loops; i++) { 113 cur_size = size; 114 if (cur_size > 0x1FFFFF) { 115 cur_size = 0x1FFFFF; 116 } 117 size -= cur_size; 118 radeon_ring_write(ring, PACKET0(0x720, 2)); 119 radeon_ring_write(ring, src_offset); 120 radeon_ring_write(ring, dst_offset); 121 radeon_ring_write(ring, cur_size | (1 << 31) | (1 << 30)); 122 src_offset += cur_size; 123 dst_offset += cur_size; 124 } 125 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 126 radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE); 127 r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); 128 if (r) { 129 radeon_ring_unlock_undo(rdev, ring); 130 return ERR_PTR(r); 131 } 132 radeon_ring_unlock_commit(rdev, ring, false); 133 return fence; 134} 135 136 137static int r200_get_vtx_size_1(uint32_t vtx_fmt_1) 138{ 139 int vtx_size, i, tex_size; 140 vtx_size = 0; 141 for (i = 0; i < 6; i++) { 142 tex_size = (vtx_fmt_1 >> (i * 3)) & 0x7; 143 if (tex_size > 4) 144 continue; 145 vtx_size += tex_size; 146 } 147 return vtx_size; 148} 149 150int r200_packet0_check(struct radeon_cs_parser *p, 151 struct radeon_cs_packet *pkt, 152 unsigned idx, unsigned reg) 153{ 154 struct radeon_bo_list *reloc; 155 struct r100_cs_track *track; 156 volatile uint32_t *ib; 157 uint32_t tmp; 158 int r; 159 int i; 160 int face; 161 u32 tile_flags = 0; 162 u32 idx_value; 163 164 ib = p->ib.ptr; 165 track = (struct r100_cs_track *)p->track; 166 idx_value = radeon_get_ib_value(p, idx); 167 switch (reg) { 168 case RADEON_CRTC_GUI_TRIG_VLINE: 169 r = r100_cs_packet_parse_vline(p); 170 if (r) { 171 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 172 idx, reg); 173 radeon_cs_dump_packet(p, pkt); 174 return r; 175 } 176 break; 177 /* FIXME: only allow PACKET3 blit? easier to check for out of 178 * range access */ 179 case RADEON_DST_PITCH_OFFSET: 180 case RADEON_SRC_PITCH_OFFSET: 181 r = r100_reloc_pitch_offset(p, pkt, idx, reg); 182 if (r) 183 return r; 184 break; 185 case RADEON_RB3D_DEPTHOFFSET: 186 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 187 if (r) { 188 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 189 idx, reg); 190 radeon_cs_dump_packet(p, pkt); 191 return r; 192 } 193 track->zb.robj = reloc->robj; 194 track->zb.offset = idx_value; 195 track->zb_dirty = true; 196 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 197 break; 198 case RADEON_RB3D_COLOROFFSET: 199 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 200 if (r) { 201 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 202 idx, reg); 203 radeon_cs_dump_packet(p, pkt); 204 return r; 205 } 206 track->cb[0].robj = reloc->robj; 207 track->cb[0].offset = idx_value; 208 track->cb_dirty = true; 209 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 210 break; 211 case R200_PP_TXOFFSET_0: 212 case R200_PP_TXOFFSET_1: 213 case R200_PP_TXOFFSET_2: 214 case R200_PP_TXOFFSET_3: 215 case R200_PP_TXOFFSET_4: 216 case R200_PP_TXOFFSET_5: 217 i = (reg - R200_PP_TXOFFSET_0) / 24; 218 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 219 if (r) { 220 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 221 idx, reg); 222 radeon_cs_dump_packet(p, pkt); 223 return r; 224 } 225 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 226 if (reloc->tiling_flags & RADEON_TILING_MACRO) 227 tile_flags |= R200_TXO_MACRO_TILE; 228 if (reloc->tiling_flags & RADEON_TILING_MICRO) 229 tile_flags |= R200_TXO_MICRO_TILE; 230 231 tmp = idx_value & ~(0x7 << 2); 232 tmp |= tile_flags; 233 ib[idx] = tmp + ((u32)reloc->gpu_offset); 234 } else 235 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 236 track->textures[i].robj = reloc->robj; 237 track->tex_dirty = true; 238 break; 239 case R200_PP_CUBIC_OFFSET_F1_0: 240 case R200_PP_CUBIC_OFFSET_F2_0: 241 case R200_PP_CUBIC_OFFSET_F3_0: 242 case R200_PP_CUBIC_OFFSET_F4_0: 243 case R200_PP_CUBIC_OFFSET_F5_0: 244 case R200_PP_CUBIC_OFFSET_F1_1: 245 case R200_PP_CUBIC_OFFSET_F2_1: 246 case R200_PP_CUBIC_OFFSET_F3_1: 247 case R200_PP_CUBIC_OFFSET_F4_1: 248 case R200_PP_CUBIC_OFFSET_F5_1: 249 case R200_PP_CUBIC_OFFSET_F1_2: 250 case R200_PP_CUBIC_OFFSET_F2_2: 251 case R200_PP_CUBIC_OFFSET_F3_2: 252 case R200_PP_CUBIC_OFFSET_F4_2: 253 case R200_PP_CUBIC_OFFSET_F5_2: 254 case R200_PP_CUBIC_OFFSET_F1_3: 255 case R200_PP_CUBIC_OFFSET_F2_3: 256 case R200_PP_CUBIC_OFFSET_F3_3: 257 case R200_PP_CUBIC_OFFSET_F4_3: 258 case R200_PP_CUBIC_OFFSET_F5_3: 259 case R200_PP_CUBIC_OFFSET_F1_4: 260 case R200_PP_CUBIC_OFFSET_F2_4: 261 case R200_PP_CUBIC_OFFSET_F3_4: 262 case R200_PP_CUBIC_OFFSET_F4_4: 263 case R200_PP_CUBIC_OFFSET_F5_4: 264 case R200_PP_CUBIC_OFFSET_F1_5: 265 case R200_PP_CUBIC_OFFSET_F2_5: 266 case R200_PP_CUBIC_OFFSET_F3_5: 267 case R200_PP_CUBIC_OFFSET_F4_5: 268 case R200_PP_CUBIC_OFFSET_F5_5: 269 i = (reg - R200_PP_TXOFFSET_0) / 24; 270 face = (reg - ((i * 24) + R200_PP_TXOFFSET_0)) / 4; 271 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 272 if (r) { 273 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 274 idx, reg); 275 radeon_cs_dump_packet(p, pkt); 276 return r; 277 } 278 track->textures[i].cube_info[face - 1].offset = idx_value; 279 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 280 track->textures[i].cube_info[face - 1].robj = reloc->robj; 281 track->tex_dirty = true; 282 break; 283 case RADEON_RE_WIDTH_HEIGHT: 284 track->maxy = ((idx_value >> 16) & 0x7FF); 285 track->cb_dirty = true; 286 track->zb_dirty = true; 287 break; 288 case RADEON_RB3D_COLORPITCH: 289 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 290 if (r) { 291 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 292 idx, reg); 293 radeon_cs_dump_packet(p, pkt); 294 return r; 295 } 296 297 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 298 if (reloc->tiling_flags & RADEON_TILING_MACRO) 299 tile_flags |= RADEON_COLOR_TILE_ENABLE; 300 if (reloc->tiling_flags & RADEON_TILING_MICRO) 301 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 302 303 tmp = idx_value & ~(0x7 << 16); 304 tmp |= tile_flags; 305 ib[idx] = tmp; 306 } else 307 ib[idx] = idx_value; 308 309 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 310 track->cb_dirty = true; 311 break; 312 case RADEON_RB3D_DEPTHPITCH: 313 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 314 track->zb_dirty = true; 315 break; 316 case RADEON_RB3D_CNTL: 317 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 318 case 7: 319 case 8: 320 case 9: 321 case 11: 322 case 12: 323 track->cb[0].cpp = 1; 324 break; 325 case 3: 326 case 4: 327 case 15: 328 track->cb[0].cpp = 2; 329 break; 330 case 6: 331 track->cb[0].cpp = 4; 332 break; 333 default: 334 DRM_ERROR("Invalid color buffer format (%d) !\n", 335 ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); 336 return -EINVAL; 337 } 338 if (idx_value & RADEON_DEPTHXY_OFFSET_ENABLE) { 339 DRM_ERROR("No support for depth xy offset in kms\n"); 340 return -EINVAL; 341 } 342 343 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 344 track->cb_dirty = true; 345 track->zb_dirty = true; 346 break; 347 case RADEON_RB3D_ZSTENCILCNTL: 348 switch (idx_value & 0xf) { 349 case 0: 350 track->zb.cpp = 2; 351 break; 352 case 2: 353 case 3: 354 case 4: 355 case 5: 356 case 9: 357 case 11: 358 track->zb.cpp = 4; 359 break; 360 default: 361 break; 362 } 363 track->zb_dirty = true; 364 break; 365 case RADEON_RB3D_ZPASS_ADDR: 366 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 367 if (r) { 368 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 369 idx, reg); 370 radeon_cs_dump_packet(p, pkt); 371 return r; 372 } 373 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 374 break; 375 case RADEON_PP_CNTL: 376 { 377 uint32_t temp = idx_value >> 4; 378 for (i = 0; i < track->num_texture; i++) 379 track->textures[i].enabled = !!(temp & (1 << i)); 380 track->tex_dirty = true; 381 } 382 break; 383 case RADEON_SE_VF_CNTL: 384 track->vap_vf_cntl = idx_value; 385 break; 386 case 0x210c: 387 /* VAP_VF_MAX_VTX_INDX */ 388 track->max_indx = idx_value & 0x00FFFFFFUL; 389 break; 390 case R200_SE_VTX_FMT_0: 391 track->vtx_size = r200_get_vtx_size_0(idx_value); 392 break; 393 case R200_SE_VTX_FMT_1: 394 track->vtx_size += r200_get_vtx_size_1(idx_value); 395 break; 396 case R200_PP_TXSIZE_0: 397 case R200_PP_TXSIZE_1: 398 case R200_PP_TXSIZE_2: 399 case R200_PP_TXSIZE_3: 400 case R200_PP_TXSIZE_4: 401 case R200_PP_TXSIZE_5: 402 i = (reg - R200_PP_TXSIZE_0) / 32; 403 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 404 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 405 track->tex_dirty = true; 406 break; 407 case R200_PP_TXPITCH_0: 408 case R200_PP_TXPITCH_1: 409 case R200_PP_TXPITCH_2: 410 case R200_PP_TXPITCH_3: 411 case R200_PP_TXPITCH_4: 412 case R200_PP_TXPITCH_5: 413 i = (reg - R200_PP_TXPITCH_0) / 32; 414 track->textures[i].pitch = idx_value + 32; 415 track->tex_dirty = true; 416 break; 417 case R200_PP_TXFILTER_0: 418 case R200_PP_TXFILTER_1: 419 case R200_PP_TXFILTER_2: 420 case R200_PP_TXFILTER_3: 421 case R200_PP_TXFILTER_4: 422 case R200_PP_TXFILTER_5: 423 i = (reg - R200_PP_TXFILTER_0) / 32; 424 track->textures[i].num_levels = ((idx_value & R200_MAX_MIP_LEVEL_MASK) 425 >> R200_MAX_MIP_LEVEL_SHIFT); 426 tmp = (idx_value >> 23) & 0x7; 427 if (tmp == 2 || tmp == 6) 428 track->textures[i].roundup_w = false; 429 tmp = (idx_value >> 27) & 0x7; 430 if (tmp == 2 || tmp == 6) 431 track->textures[i].roundup_h = false; 432 track->tex_dirty = true; 433 break; 434 case R200_PP_TXMULTI_CTL_0: 435 case R200_PP_TXMULTI_CTL_1: 436 case R200_PP_TXMULTI_CTL_2: 437 case R200_PP_TXMULTI_CTL_3: 438 case R200_PP_TXMULTI_CTL_4: 439 case R200_PP_TXMULTI_CTL_5: 440 i = (reg - R200_PP_TXMULTI_CTL_0) / 32; 441 break; 442 case R200_PP_TXFORMAT_X_0: 443 case R200_PP_TXFORMAT_X_1: 444 case R200_PP_TXFORMAT_X_2: 445 case R200_PP_TXFORMAT_X_3: 446 case R200_PP_TXFORMAT_X_4: 447 case R200_PP_TXFORMAT_X_5: 448 i = (reg - R200_PP_TXFORMAT_X_0) / 32; 449 track->textures[i].txdepth = idx_value & 0x7; 450 tmp = (idx_value >> 16) & 0x3; 451 /* 2D, 3D, CUBE */ 452 switch (tmp) { 453 case 0: 454 case 3: 455 case 4: 456 case 5: 457 case 6: 458 case 7: 459 /* 1D/2D */ 460 track->textures[i].tex_coord_type = 0; 461 break; 462 case 1: 463 /* CUBE */ 464 track->textures[i].tex_coord_type = 2; 465 break; 466 case 2: 467 /* 3D */ 468 track->textures[i].tex_coord_type = 1; 469 break; 470 } 471 track->tex_dirty = true; 472 break; 473 case R200_PP_TXFORMAT_0: 474 case R200_PP_TXFORMAT_1: 475 case R200_PP_TXFORMAT_2: 476 case R200_PP_TXFORMAT_3: 477 case R200_PP_TXFORMAT_4: 478 case R200_PP_TXFORMAT_5: 479 i = (reg - R200_PP_TXFORMAT_0) / 32; 480 if (idx_value & R200_TXFORMAT_NON_POWER2) { 481 track->textures[i].use_pitch = 1; 482 } else { 483 track->textures[i].use_pitch = 0; 484 track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); 485 track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); 486 } 487 if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE) 488 track->textures[i].lookup_disable = true; 489 switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) { 490 case R200_TXFORMAT_I8: 491 case R200_TXFORMAT_RGB332: 492 case R200_TXFORMAT_Y8: 493 track->textures[i].cpp = 1; 494 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 495 break; 496 case R200_TXFORMAT_AI88: 497 case R200_TXFORMAT_ARGB1555: 498 case R200_TXFORMAT_RGB565: 499 case R200_TXFORMAT_ARGB4444: 500 case R200_TXFORMAT_VYUY422: 501 case R200_TXFORMAT_YVYU422: 502 case R200_TXFORMAT_LDVDU655: 503 case R200_TXFORMAT_DVDU88: 504 case R200_TXFORMAT_AVYU4444: 505 track->textures[i].cpp = 2; 506 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 507 break; 508 case R200_TXFORMAT_ARGB8888: 509 case R200_TXFORMAT_RGBA8888: 510 case R200_TXFORMAT_ABGR8888: 511 case R200_TXFORMAT_BGR111110: 512 case R200_TXFORMAT_LDVDU8888: 513 track->textures[i].cpp = 4; 514 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 515 break; 516 case R200_TXFORMAT_DXT1: 517 track->textures[i].cpp = 1; 518 track->textures[i].compress_format = R100_TRACK_COMP_DXT1; 519 break; 520 case R200_TXFORMAT_DXT23: 521 case R200_TXFORMAT_DXT45: 522 track->textures[i].cpp = 1; 523 track->textures[i].compress_format = R100_TRACK_COMP_DXT1; 524 break; 525 } 526 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 527 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 528 track->tex_dirty = true; 529 break; 530 case R200_PP_CUBIC_FACES_0: 531 case R200_PP_CUBIC_FACES_1: 532 case R200_PP_CUBIC_FACES_2: 533 case R200_PP_CUBIC_FACES_3: 534 case R200_PP_CUBIC_FACES_4: 535 case R200_PP_CUBIC_FACES_5: 536 tmp = idx_value; 537 i = (reg - R200_PP_CUBIC_FACES_0) / 32; 538 for (face = 0; face < 4; face++) { 539 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 540 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 541 } 542 track->tex_dirty = true; 543 break; 544 default: 545 pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx); 546 return -EINVAL; 547 } 548 return 0; 549} 550 551void r200_set_safe_registers(struct radeon_device *rdev) 552{ 553 rdev->config.r100.reg_safe_bm = r200_reg_safe_bm; 554 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r200_reg_safe_bm); 555} 556