1/* savage_state.c -- State and drawing support for Savage 2 * 3 * Copyright 2004 Felix Kuehling 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sub license, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD$"); 28#include "dev/drm/drmP.h" 29#include "dev/drm/savage_drm.h" 30#include "dev/drm/savage_drv.h" 31 32void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv, 33 const struct drm_clip_rect *pbox) 34{ 35 uint32_t scstart = dev_priv->state.s3d.new_scstart; 36 uint32_t scend = dev_priv->state.s3d.new_scend; 37 scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) | 38 ((uint32_t)pbox->x1 & 0x000007ff) | 39 (((uint32_t)pbox->y1 << 16) & 0x07ff0000); 40 scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) | 41 (((uint32_t)pbox->x2 - 1) & 0x000007ff) | 42 ((((uint32_t)pbox->y2 - 1) << 16) & 0x07ff0000); 43 if (scstart != dev_priv->state.s3d.scstart || 44 scend != dev_priv->state.s3d.scend) { 45 DMA_LOCALS; 46 BEGIN_DMA(4); 47 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 48 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); 49 DMA_WRITE(scstart); 50 DMA_WRITE(scend); 51 dev_priv->state.s3d.scstart = scstart; 52 dev_priv->state.s3d.scend = scend; 53 dev_priv->waiting = 1; 54 DMA_COMMIT(); 55 } 56} 57 58void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, 59 const struct drm_clip_rect *pbox) 60{ 61 uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0; 62 uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1; 63 drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) | 64 ((uint32_t)pbox->x1 & 0x000007ff) | 65 (((uint32_t)pbox->y1 << 12) & 0x00fff000); 66 drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) | 67 (((uint32_t)pbox->x2 - 1) & 0x000007ff) | 68 ((((uint32_t)pbox->y2 - 1) << 12) & 0x00fff000); 69 if (drawctrl0 != dev_priv->state.s4.drawctrl0 || 70 drawctrl1 != dev_priv->state.s4.drawctrl1) { 71 DMA_LOCALS; 72 BEGIN_DMA(4); 73 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 74 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); 75 DMA_WRITE(drawctrl0); 76 DMA_WRITE(drawctrl1); 77 dev_priv->state.s4.drawctrl0 = drawctrl0; 78 dev_priv->state.s4.drawctrl1 = drawctrl1; 79 dev_priv->waiting = 1; 80 DMA_COMMIT(); 81 } 82} 83 84static int savage_verify_texaddr(drm_savage_private_t *dev_priv, int unit, 85 uint32_t addr) 86{ 87 if ((addr & 6) != 2) { /* reserved bits */ 88 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr); 89 return -EINVAL; 90 } 91 if (!(addr & 1)) { /* local */ 92 addr &= ~7; 93 if (addr < dev_priv->texture_offset || 94 addr >= dev_priv->texture_offset + dev_priv->texture_size) { 95 DRM_ERROR 96 ("bad texAddr%d %08x (local addr out of range)\n", 97 unit, addr); 98 return -EINVAL; 99 } 100 } else { /* AGP */ 101 if (!dev_priv->agp_textures) { 102 DRM_ERROR("bad texAddr%d %08x (AGP not available)\n", 103 unit, addr); 104 return -EINVAL; 105 } 106 addr &= ~7; 107 if (addr < dev_priv->agp_textures->offset || 108 addr >= (dev_priv->agp_textures->offset + 109 dev_priv->agp_textures->size)) { 110 DRM_ERROR 111 ("bad texAddr%d %08x (AGP addr out of range)\n", 112 unit, addr); 113 return -EINVAL; 114 } 115 } 116 return 0; 117} 118 119#define SAVE_STATE(reg,where) \ 120 if(start <= reg && start + count > reg) \ 121 dev_priv->state.where = regs[reg - start] 122#define SAVE_STATE_MASK(reg,where,mask) do { \ 123 if(start <= reg && start + count > reg) { \ 124 uint32_t tmp; \ 125 tmp = regs[reg - start]; \ 126 dev_priv->state.where = (tmp & (mask)) | \ 127 (dev_priv->state.where & ~(mask)); \ 128 } \ 129} while (0) 130static int savage_verify_state_s3d(drm_savage_private_t *dev_priv, 131 unsigned int start, unsigned int count, 132 const uint32_t *regs) 133{ 134 if (start < SAVAGE_TEXPALADDR_S3D || 135 start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) { 136 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 137 start, start + count - 1); 138 return -EINVAL; 139 } 140 141 SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart, 142 ~SAVAGE_SCISSOR_MASK_S3D); 143 SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend, 144 ~SAVAGE_SCISSOR_MASK_S3D); 145 146 /* if any texture regs were changed ... */ 147 if (start <= SAVAGE_TEXCTRL_S3D && 148 start + count > SAVAGE_TEXPALADDR_S3D) { 149 /* ... check texture state */ 150 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl); 151 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr); 152 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK) 153 return savage_verify_texaddr(dev_priv, 0, 154 dev_priv->state.s3d.texaddr); 155 } 156 157 return 0; 158} 159 160static int savage_verify_state_s4(drm_savage_private_t *dev_priv, 161 unsigned int start, unsigned int count, 162 const uint32_t *regs) 163{ 164 int ret = 0; 165 166 if (start < SAVAGE_DRAWLOCALCTRL_S4 || 167 start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) { 168 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 169 start, start + count - 1); 170 return -EINVAL; 171 } 172 173 SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0, 174 ~SAVAGE_SCISSOR_MASK_S4); 175 SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1, 176 ~SAVAGE_SCISSOR_MASK_S4); 177 178 /* if any texture regs were changed ... */ 179 if (start <= SAVAGE_TEXDESCR_S4 && 180 start + count > SAVAGE_TEXPALADDR_S4) { 181 /* ... check texture state */ 182 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr); 183 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0); 184 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1); 185 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK) 186 ret |= savage_verify_texaddr(dev_priv, 0, 187 dev_priv->state.s4.texaddr0); 188 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK) 189 ret |= savage_verify_texaddr(dev_priv, 1, 190 dev_priv->state.s4.texaddr1); 191 } 192 193 return ret; 194} 195#undef SAVE_STATE 196#undef SAVE_STATE_MASK 197 198static int savage_dispatch_state(drm_savage_private_t *dev_priv, 199 const drm_savage_cmd_header_t *cmd_header, 200 const uint32_t *regs) 201{ 202 unsigned int count = cmd_header->state.count; 203 unsigned int start = cmd_header->state.start; 204 unsigned int count2 = 0; 205 unsigned int bci_size; 206 int ret; 207 DMA_LOCALS; 208 209 if (!count) 210 return 0; 211 212 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 213 ret = savage_verify_state_s3d(dev_priv, start, count, regs); 214 if (ret != 0) 215 return ret; 216 /* scissor regs are emitted in savage_dispatch_draw */ 217 if (start < SAVAGE_SCSTART_S3D) { 218 if (start + count > SAVAGE_SCEND_S3D + 1) 219 count2 = count - (SAVAGE_SCEND_S3D + 1 - start); 220 if (start + count > SAVAGE_SCSTART_S3D) 221 count = SAVAGE_SCSTART_S3D - start; 222 } else if (start <= SAVAGE_SCEND_S3D) { 223 if (start + count > SAVAGE_SCEND_S3D + 1) { 224 count -= SAVAGE_SCEND_S3D + 1 - start; 225 start = SAVAGE_SCEND_S3D + 1; 226 } else 227 return 0; 228 } 229 } else { 230 ret = savage_verify_state_s4(dev_priv, start, count, regs); 231 if (ret != 0) 232 return ret; 233 /* scissor regs are emitted in savage_dispatch_draw */ 234 if (start < SAVAGE_DRAWCTRL0_S4) { 235 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) 236 count2 = count - 237 (SAVAGE_DRAWCTRL1_S4 + 1 - start); 238 if (start + count > SAVAGE_DRAWCTRL0_S4) 239 count = SAVAGE_DRAWCTRL0_S4 - start; 240 } else if (start <= SAVAGE_DRAWCTRL1_S4) { 241 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) { 242 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start; 243 start = SAVAGE_DRAWCTRL1_S4 + 1; 244 } else 245 return 0; 246 } 247 } 248 249 bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255; 250 251 if (cmd_header->state.global) { 252 BEGIN_DMA(bci_size + 1); 253 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 254 dev_priv->waiting = 1; 255 } else { 256 BEGIN_DMA(bci_size); 257 } 258 259 do { 260 while (count > 0) { 261 unsigned int n = count < 255 ? count : 255; 262 DMA_SET_REGISTERS(start, n); 263 DMA_COPY(regs, n); 264 count -= n; 265 start += n; 266 regs += n; 267 } 268 start += 2; 269 regs += 2; 270 count = count2; 271 count2 = 0; 272 } while (count); 273 274 DMA_COMMIT(); 275 276 return 0; 277} 278 279static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv, 280 const drm_savage_cmd_header_t *cmd_header, 281 const struct drm_buf *dmabuf) 282{ 283 unsigned char reorder = 0; 284 unsigned int prim = cmd_header->prim.prim; 285 unsigned int skip = cmd_header->prim.skip; 286 unsigned int n = cmd_header->prim.count; 287 unsigned int start = cmd_header->prim.start; 288 unsigned int i; 289 BCI_LOCALS; 290 291 if (!dmabuf) { 292 DRM_ERROR("called without dma buffers!\n"); 293 return -EINVAL; 294 } 295 296 if (!n) 297 return 0; 298 299 switch (prim) { 300 case SAVAGE_PRIM_TRILIST_201: 301 reorder = 1; 302 prim = SAVAGE_PRIM_TRILIST; 303 case SAVAGE_PRIM_TRILIST: 304 if (n % 3 != 0) { 305 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 306 n); 307 return -EINVAL; 308 } 309 break; 310 case SAVAGE_PRIM_TRISTRIP: 311 case SAVAGE_PRIM_TRIFAN: 312 if (n < 3) { 313 DRM_ERROR 314 ("wrong number of vertices %u in TRIFAN/STRIP\n", 315 n); 316 return -EINVAL; 317 } 318 break; 319 default: 320 DRM_ERROR("invalid primitive type %u\n", prim); 321 return -EINVAL; 322 } 323 324 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 325 if (skip != 0) { 326 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 327 return -EINVAL; 328 } 329 } else { 330 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 331 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 332 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 333 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 334 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 335 return -EINVAL; 336 } 337 if (reorder) { 338 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 339 return -EINVAL; 340 } 341 } 342 343 if (start + n > dmabuf->total / 32) { 344 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 345 start, start + n - 1, dmabuf->total / 32); 346 return -EINVAL; 347 } 348 349 /* Vertex DMA doesn't work with command DMA at the same time, 350 * so we use BCI_... to submit commands here. Flush buffered 351 * faked DMA first. */ 352 DMA_FLUSH(); 353 354 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 355 BEGIN_BCI(2); 356 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 357 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 358 dev_priv->state.common.vbaddr = dmabuf->bus_address; 359 } 360 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 361 /* Workaround for what looks like a hardware bug. If a 362 * WAIT_3D_IDLE was emitted some time before the 363 * indexed drawing command then the engine will lock 364 * up. There are two known workarounds: 365 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 366 BEGIN_BCI(63); 367 for (i = 0; i < 63; ++i) 368 BCI_WRITE(BCI_CMD_WAIT); 369 dev_priv->waiting = 0; 370 } 371 372 prim <<= 25; 373 while (n != 0) { 374 /* Can emit up to 255 indices (85 triangles) at once. */ 375 unsigned int count = n > 255 ? 255 : n; 376 if (reorder) { 377 /* Need to reorder indices for correct flat 378 * shading while preserving the clock sense 379 * for correct culling. Only on Savage3D. */ 380 int reorder[3] = { -1, -1, -1 }; 381 reorder[start % 3] = 2; 382 383 BEGIN_BCI((count + 1 + 1) / 2); 384 BCI_DRAW_INDICES_S3D(count, prim, start + 2); 385 386 for (i = start + 1; i + 1 < start + count; i += 2) 387 BCI_WRITE((i + reorder[i % 3]) | 388 ((i + 1 + 389 reorder[(i + 1) % 3]) << 16)); 390 if (i < start + count) 391 BCI_WRITE(i + reorder[i % 3]); 392 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 393 BEGIN_BCI((count + 1 + 1) / 2); 394 BCI_DRAW_INDICES_S3D(count, prim, start); 395 396 for (i = start + 1; i + 1 < start + count; i += 2) 397 BCI_WRITE(i | ((i + 1) << 16)); 398 if (i < start + count) 399 BCI_WRITE(i); 400 } else { 401 BEGIN_BCI((count + 2 + 1) / 2); 402 BCI_DRAW_INDICES_S4(count, prim, skip); 403 404 for (i = start; i + 1 < start + count; i += 2) 405 BCI_WRITE(i | ((i + 1) << 16)); 406 if (i < start + count) 407 BCI_WRITE(i); 408 } 409 410 start += count; 411 n -= count; 412 413 prim |= BCI_CMD_DRAW_CONT; 414 } 415 416 return 0; 417} 418 419static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv, 420 const drm_savage_cmd_header_t *cmd_header, 421 const uint32_t *vtxbuf, unsigned int vb_size, 422 unsigned int vb_stride) 423{ 424 unsigned char reorder = 0; 425 unsigned int prim = cmd_header->prim.prim; 426 unsigned int skip = cmd_header->prim.skip; 427 unsigned int n = cmd_header->prim.count; 428 unsigned int start = cmd_header->prim.start; 429 unsigned int vtx_size; 430 unsigned int i; 431 DMA_LOCALS; 432 433 if (!n) 434 return 0; 435 436 switch (prim) { 437 case SAVAGE_PRIM_TRILIST_201: 438 reorder = 1; 439 prim = SAVAGE_PRIM_TRILIST; 440 case SAVAGE_PRIM_TRILIST: 441 if (n % 3 != 0) { 442 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 443 n); 444 return -EINVAL; 445 } 446 break; 447 case SAVAGE_PRIM_TRISTRIP: 448 case SAVAGE_PRIM_TRIFAN: 449 if (n < 3) { 450 DRM_ERROR 451 ("wrong number of vertices %u in TRIFAN/STRIP\n", 452 n); 453 return -EINVAL; 454 } 455 break; 456 default: 457 DRM_ERROR("invalid primitive type %u\n", prim); 458 return -EINVAL; 459 } 460 461 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 462 if (skip > SAVAGE_SKIP_ALL_S3D) { 463 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 464 return -EINVAL; 465 } 466 vtx_size = 8; /* full vertex */ 467 } else { 468 if (skip > SAVAGE_SKIP_ALL_S4) { 469 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 470 return -EINVAL; 471 } 472 vtx_size = 10; /* full vertex */ 473 } 474 475 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 476 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 477 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 478 479 if (vtx_size > vb_stride) { 480 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 481 vtx_size, vb_stride); 482 return -EINVAL; 483 } 484 485 if (start + n > vb_size / (vb_stride * 4)) { 486 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 487 start, start + n - 1, vb_size / (vb_stride * 4)); 488 return -EINVAL; 489 } 490 491 prim <<= 25; 492 while (n != 0) { 493 /* Can emit up to 255 vertices (85 triangles) at once. */ 494 unsigned int count = n > 255 ? 255 : n; 495 if (reorder) { 496 /* Need to reorder vertices for correct flat 497 * shading while preserving the clock sense 498 * for correct culling. Only on Savage3D. */ 499 int reorder[3] = { -1, -1, -1 }; 500 reorder[start % 3] = 2; 501 502 BEGIN_DMA(count * vtx_size + 1); 503 DMA_DRAW_PRIMITIVE(count, prim, skip); 504 505 for (i = start; i < start + count; ++i) { 506 unsigned int j = i + reorder[i % 3]; 507 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 508 } 509 510 DMA_COMMIT(); 511 } else { 512 BEGIN_DMA(count * vtx_size + 1); 513 DMA_DRAW_PRIMITIVE(count, prim, skip); 514 515 if (vb_stride == vtx_size) { 516 DMA_COPY(&vtxbuf[vb_stride * start], 517 vtx_size * count); 518 } else { 519 for (i = start; i < start + count; ++i) { 520 DMA_COPY(&vtxbuf[vb_stride * i], 521 vtx_size); 522 } 523 } 524 525 DMA_COMMIT(); 526 } 527 528 start += count; 529 n -= count; 530 531 prim |= BCI_CMD_DRAW_CONT; 532 } 533 534 return 0; 535} 536 537static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv, 538 const drm_savage_cmd_header_t *cmd_header, 539 const uint16_t *idx, 540 const struct drm_buf *dmabuf) 541{ 542 unsigned char reorder = 0; 543 unsigned int prim = cmd_header->idx.prim; 544 unsigned int skip = cmd_header->idx.skip; 545 unsigned int n = cmd_header->idx.count; 546 unsigned int i; 547 BCI_LOCALS; 548 549 if (!dmabuf) { 550 DRM_ERROR("called without dma buffers!\n"); 551 return -EINVAL; 552 } 553 554 if (!n) 555 return 0; 556 557 switch (prim) { 558 case SAVAGE_PRIM_TRILIST_201: 559 reorder = 1; 560 prim = SAVAGE_PRIM_TRILIST; 561 case SAVAGE_PRIM_TRILIST: 562 if (n % 3 != 0) { 563 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 564 return -EINVAL; 565 } 566 break; 567 case SAVAGE_PRIM_TRISTRIP: 568 case SAVAGE_PRIM_TRIFAN: 569 if (n < 3) { 570 DRM_ERROR 571 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 572 return -EINVAL; 573 } 574 break; 575 default: 576 DRM_ERROR("invalid primitive type %u\n", prim); 577 return -EINVAL; 578 } 579 580 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 581 if (skip != 0) { 582 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 583 return -EINVAL; 584 } 585 } else { 586 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 587 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 588 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 589 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 590 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 591 return -EINVAL; 592 } 593 if (reorder) { 594 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 595 return -EINVAL; 596 } 597 } 598 599 /* Vertex DMA doesn't work with command DMA at the same time, 600 * so we use BCI_... to submit commands here. Flush buffered 601 * faked DMA first. */ 602 DMA_FLUSH(); 603 604 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 605 BEGIN_BCI(2); 606 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 607 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 608 dev_priv->state.common.vbaddr = dmabuf->bus_address; 609 } 610 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 611 /* Workaround for what looks like a hardware bug. If a 612 * WAIT_3D_IDLE was emitted some time before the 613 * indexed drawing command then the engine will lock 614 * up. There are two known workarounds: 615 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 616 BEGIN_BCI(63); 617 for (i = 0; i < 63; ++i) 618 BCI_WRITE(BCI_CMD_WAIT); 619 dev_priv->waiting = 0; 620 } 621 622 prim <<= 25; 623 while (n != 0) { 624 /* Can emit up to 255 indices (85 triangles) at once. */ 625 unsigned int count = n > 255 ? 255 : n; 626 627 /* check indices */ 628 for (i = 0; i < count; ++i) { 629 if (idx[i] > dmabuf->total / 32) { 630 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 631 i, idx[i], dmabuf->total / 32); 632 return -EINVAL; 633 } 634 } 635 636 if (reorder) { 637 /* Need to reorder indices for correct flat 638 * shading while preserving the clock sense 639 * for correct culling. Only on Savage3D. */ 640 int reorder[3] = { 2, -1, -1 }; 641 642 BEGIN_BCI((count + 1 + 1) / 2); 643 BCI_DRAW_INDICES_S3D(count, prim, idx[2]); 644 645 for (i = 1; i + 1 < count; i += 2) 646 BCI_WRITE(idx[i + reorder[i % 3]] | 647 (idx[i + 1 + 648 reorder[(i + 1) % 3]] << 16)); 649 if (i < count) 650 BCI_WRITE(idx[i + reorder[i % 3]]); 651 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 652 BEGIN_BCI((count + 1 + 1) / 2); 653 BCI_DRAW_INDICES_S3D(count, prim, idx[0]); 654 655 for (i = 1; i + 1 < count; i += 2) 656 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 657 if (i < count) 658 BCI_WRITE(idx[i]); 659 } else { 660 BEGIN_BCI((count + 2 + 1) / 2); 661 BCI_DRAW_INDICES_S4(count, prim, skip); 662 663 for (i = 0; i + 1 < count; i += 2) 664 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 665 if (i < count) 666 BCI_WRITE(idx[i]); 667 } 668 669 idx += count; 670 n -= count; 671 672 prim |= BCI_CMD_DRAW_CONT; 673 } 674 675 return 0; 676} 677 678static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv, 679 const drm_savage_cmd_header_t *cmd_header, 680 const uint16_t *idx, 681 const uint32_t *vtxbuf, 682 unsigned int vb_size, unsigned int vb_stride) 683{ 684 unsigned char reorder = 0; 685 unsigned int prim = cmd_header->idx.prim; 686 unsigned int skip = cmd_header->idx.skip; 687 unsigned int n = cmd_header->idx.count; 688 unsigned int vtx_size; 689 unsigned int i; 690 DMA_LOCALS; 691 692 if (!n) 693 return 0; 694 695 switch (prim) { 696 case SAVAGE_PRIM_TRILIST_201: 697 reorder = 1; 698 prim = SAVAGE_PRIM_TRILIST; 699 case SAVAGE_PRIM_TRILIST: 700 if (n % 3 != 0) { 701 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 702 return -EINVAL; 703 } 704 break; 705 case SAVAGE_PRIM_TRISTRIP: 706 case SAVAGE_PRIM_TRIFAN: 707 if (n < 3) { 708 DRM_ERROR 709 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 710 return -EINVAL; 711 } 712 break; 713 default: 714 DRM_ERROR("invalid primitive type %u\n", prim); 715 return -EINVAL; 716 } 717 718 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 719 if (skip > SAVAGE_SKIP_ALL_S3D) { 720 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 721 return -EINVAL; 722 } 723 vtx_size = 8; /* full vertex */ 724 } else { 725 if (skip > SAVAGE_SKIP_ALL_S4) { 726 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 727 return -EINVAL; 728 } 729 vtx_size = 10; /* full vertex */ 730 } 731 732 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 733 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 734 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 735 736 if (vtx_size > vb_stride) { 737 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 738 vtx_size, vb_stride); 739 return -EINVAL; 740 } 741 742 prim <<= 25; 743 while (n != 0) { 744 /* Can emit up to 255 vertices (85 triangles) at once. */ 745 unsigned int count = n > 255 ? 255 : n; 746 747 /* Check indices */ 748 for (i = 0; i < count; ++i) { 749 if (idx[i] > vb_size / (vb_stride * 4)) { 750 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 751 i, idx[i], vb_size / (vb_stride * 4)); 752 return -EINVAL; 753 } 754 } 755 756 if (reorder) { 757 /* Need to reorder vertices for correct flat 758 * shading while preserving the clock sense 759 * for correct culling. Only on Savage3D. */ 760 int reorder[3] = { 2, -1, -1 }; 761 762 BEGIN_DMA(count * vtx_size + 1); 763 DMA_DRAW_PRIMITIVE(count, prim, skip); 764 765 for (i = 0; i < count; ++i) { 766 unsigned int j = idx[i + reorder[i % 3]]; 767 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 768 } 769 770 DMA_COMMIT(); 771 } else { 772 BEGIN_DMA(count * vtx_size + 1); 773 DMA_DRAW_PRIMITIVE(count, prim, skip); 774 775 for (i = 0; i < count; ++i) { 776 unsigned int j = idx[i]; 777 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 778 } 779 780 DMA_COMMIT(); 781 } 782 783 idx += count; 784 n -= count; 785 786 prim |= BCI_CMD_DRAW_CONT; 787 } 788 789 return 0; 790} 791 792static int savage_dispatch_clear(drm_savage_private_t *dev_priv, 793 const drm_savage_cmd_header_t *cmd_header, 794 const drm_savage_cmd_header_t *data, 795 unsigned int nbox, 796 const struct drm_clip_rect *boxes) 797{ 798 unsigned int flags = cmd_header->clear0.flags; 799 unsigned int clear_cmd; 800 unsigned int i, nbufs; 801 DMA_LOCALS; 802 803 if (nbox == 0) 804 return 0; 805 806 clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 807 BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW; 808 BCI_CMD_SET_ROP(clear_cmd,0xCC); 809 810 nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) + 811 ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0); 812 if (nbufs == 0) 813 return 0; 814 815 if (data->clear1.mask != 0xffffffff) { 816 /* set mask */ 817 BEGIN_DMA(2); 818 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 819 DMA_WRITE(data->clear1.mask); 820 DMA_COMMIT(); 821 } 822 for (i = 0; i < nbox; ++i) { 823 unsigned int x, y, w, h; 824 unsigned int buf; 825 826 x = boxes[i].x1, y = boxes[i].y1; 827 w = boxes[i].x2 - boxes[i].x1; 828 h = boxes[i].y2 - boxes[i].y1; 829 BEGIN_DMA(nbufs * 6); 830 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) { 831 if (!(flags & buf)) 832 continue; 833 DMA_WRITE(clear_cmd); 834 switch (buf) { 835 case SAVAGE_FRONT: 836 DMA_WRITE(dev_priv->front_offset); 837 DMA_WRITE(dev_priv->front_bd); 838 break; 839 case SAVAGE_BACK: 840 DMA_WRITE(dev_priv->back_offset); 841 DMA_WRITE(dev_priv->back_bd); 842 break; 843 case SAVAGE_DEPTH: 844 DMA_WRITE(dev_priv->depth_offset); 845 DMA_WRITE(dev_priv->depth_bd); 846 break; 847 } 848 DMA_WRITE(data->clear1.value); 849 DMA_WRITE(BCI_X_Y(x, y)); 850 DMA_WRITE(BCI_W_H(w, h)); 851 } 852 DMA_COMMIT(); 853 } 854 if (data->clear1.mask != 0xffffffff) { 855 /* reset mask */ 856 BEGIN_DMA(2); 857 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 858 DMA_WRITE(0xffffffff); 859 DMA_COMMIT(); 860 } 861 862 return 0; 863} 864 865static int savage_dispatch_swap(drm_savage_private_t *dev_priv, 866 unsigned int nbox, const struct drm_clip_rect *boxes) 867{ 868 unsigned int swap_cmd; 869 unsigned int i; 870 DMA_LOCALS; 871 872 if (nbox == 0) 873 return 0; 874 875 swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 876 BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD; 877 BCI_CMD_SET_ROP(swap_cmd,0xCC); 878 879 for (i = 0; i < nbox; ++i) { 880 BEGIN_DMA(6); 881 DMA_WRITE(swap_cmd); 882 DMA_WRITE(dev_priv->back_offset); 883 DMA_WRITE(dev_priv->back_bd); 884 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 885 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 886 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1, 887 boxes[i].y2 - boxes[i].y1)); 888 DMA_COMMIT(); 889 } 890 891 return 0; 892} 893 894static int savage_dispatch_draw(drm_savage_private_t *dev_priv, 895 const drm_savage_cmd_header_t *start, 896 const drm_savage_cmd_header_t *end, 897 const struct drm_buf *dmabuf, 898 const unsigned int *vtxbuf, 899 unsigned int vb_size, unsigned int vb_stride, 900 unsigned int nbox, 901 const struct drm_clip_rect *boxes) 902{ 903 unsigned int i, j; 904 int ret; 905 906 for (i = 0; i < nbox; ++i) { 907 const drm_savage_cmd_header_t *cmdbuf; 908 dev_priv->emit_clip_rect(dev_priv, &boxes[i]); 909 910 cmdbuf = start; 911 while (cmdbuf < end) { 912 drm_savage_cmd_header_t cmd_header; 913 cmd_header = *cmdbuf; 914 cmdbuf++; 915 switch (cmd_header.cmd.cmd) { 916 case SAVAGE_CMD_DMA_PRIM: 917 ret = savage_dispatch_dma_prim( 918 dev_priv, &cmd_header, dmabuf); 919 break; 920 case SAVAGE_CMD_VB_PRIM: 921 ret = savage_dispatch_vb_prim( 922 dev_priv, &cmd_header, 923 vtxbuf, vb_size, vb_stride); 924 break; 925 case SAVAGE_CMD_DMA_IDX: 926 j = (cmd_header.idx.count + 3) / 4; 927 /* j was check in savage_bci_cmdbuf */ 928 ret = savage_dispatch_dma_idx(dev_priv, 929 &cmd_header, (const uint16_t *)cmdbuf, 930 dmabuf); 931 cmdbuf += j; 932 break; 933 case SAVAGE_CMD_VB_IDX: 934 j = (cmd_header.idx.count + 3) / 4; 935 /* j was check in savage_bci_cmdbuf */ 936 ret = savage_dispatch_vb_idx(dev_priv, 937 &cmd_header, (const uint16_t *)cmdbuf, 938 (const uint32_t *)vtxbuf, vb_size, 939 vb_stride); 940 cmdbuf += j; 941 break; 942 default: 943 /* What's the best return code? EFAULT? */ 944 DRM_ERROR("IMPLEMENTATION ERROR: " 945 "non-drawing-command %d\n", 946 cmd_header.cmd.cmd); 947 return -EINVAL; 948 } 949 950 if (ret != 0) 951 return ret; 952 } 953 } 954 955 return 0; 956} 957 958int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv) 959{ 960 drm_savage_private_t *dev_priv = dev->dev_private; 961 struct drm_device_dma *dma = dev->dma; 962 struct drm_buf *dmabuf; 963 drm_savage_cmdbuf_t *cmdbuf = data; 964 drm_savage_cmd_header_t *kcmd_addr = NULL; 965 drm_savage_cmd_header_t *first_draw_cmd; 966 unsigned int *kvb_addr = NULL; 967 struct drm_clip_rect *kbox_addr = NULL; 968 unsigned int i, j; 969 int ret = 0; 970 971 DRM_DEBUG("\n"); 972 973 LOCK_TEST_WITH_RETURN(dev, file_priv); 974 975 if (dma && dma->buflist) { 976 if (cmdbuf->dma_idx > dma->buf_count) { 977 DRM_ERROR 978 ("vertex buffer index %u out of range (0-%u)\n", 979 cmdbuf->dma_idx, dma->buf_count - 1); 980 return -EINVAL; 981 } 982 dmabuf = dma->buflist[cmdbuf->dma_idx]; 983 } else { 984 dmabuf = NULL; 985 } 986 987 /* Copy the user buffers into kernel temporary areas. This hasn't been 988 * a performance loss compared to VERIFYAREA_READ/ 989 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct 990 * for locking on FreeBSD. 991 */ 992 if (cmdbuf->size) { 993 kcmd_addr = drm_alloc(cmdbuf->size * 8, DRM_MEM_DRIVER); 994 if (kcmd_addr == NULL) 995 return -ENOMEM; 996 997 if (DRM_COPY_FROM_USER(kcmd_addr, cmdbuf->cmd_addr, 998 cmdbuf->size * 8)) 999 { 1000 drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER); 1001 return -EFAULT; 1002 } 1003 cmdbuf->cmd_addr = kcmd_addr; 1004 } 1005 if (cmdbuf->vb_size) { 1006 kvb_addr = drm_alloc(cmdbuf->vb_size, DRM_MEM_DRIVER); 1007 if (kvb_addr == NULL) { 1008 ret = -ENOMEM; 1009 goto done; 1010 } 1011 1012 if (DRM_COPY_FROM_USER(kvb_addr, cmdbuf->vb_addr, 1013 cmdbuf->vb_size)) { 1014 ret = -EFAULT; 1015 goto done; 1016 } 1017 cmdbuf->vb_addr = kvb_addr; 1018 } 1019 if (cmdbuf->nbox) { 1020 kbox_addr = drm_alloc(cmdbuf->nbox * 1021 sizeof(struct drm_clip_rect), 1022 DRM_MEM_DRIVER); 1023 if (kbox_addr == NULL) { 1024 ret = -ENOMEM; 1025 goto done; 1026 } 1027 1028 if (DRM_COPY_FROM_USER(kbox_addr, cmdbuf->box_addr, 1029 cmdbuf->nbox * 1030 sizeof(struct drm_clip_rect))) { 1031 ret = -EFAULT; 1032 goto done; 1033 } 1034 cmdbuf->box_addr = kbox_addr; 1035 } 1036 1037 /* Make sure writes to DMA buffers are finished before sending 1038 * DMA commands to the graphics hardware. */ 1039 DRM_MEMORYBARRIER(); 1040 1041 /* Coming from user space. Don't know if the Xserver has 1042 * emitted wait commands. Assuming the worst. */ 1043 dev_priv->waiting = 1; 1044 1045 i = 0; 1046 first_draw_cmd = NULL; 1047 while (i < cmdbuf->size) { 1048 drm_savage_cmd_header_t cmd_header; 1049 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr; 1050 cmdbuf->cmd_addr++; 1051 i++; 1052 1053 /* Group drawing commands with same state to minimize 1054 * iterations over clip rects. */ 1055 j = 0; 1056 switch (cmd_header.cmd.cmd) { 1057 case SAVAGE_CMD_DMA_IDX: 1058 case SAVAGE_CMD_VB_IDX: 1059 j = (cmd_header.idx.count + 3) / 4; 1060 if (i + j > cmdbuf->size) { 1061 DRM_ERROR("indexed drawing command extends " 1062 "beyond end of command buffer\n"); 1063 DMA_FLUSH(); 1064 return -EINVAL; 1065 } 1066 /* fall through */ 1067 case SAVAGE_CMD_DMA_PRIM: 1068 case SAVAGE_CMD_VB_PRIM: 1069 if (!first_draw_cmd) 1070 first_draw_cmd = cmdbuf->cmd_addr - 1; 1071 cmdbuf->cmd_addr += j; 1072 i += j; 1073 break; 1074 default: 1075 if (first_draw_cmd) { 1076 ret = savage_dispatch_draw( 1077 dev_priv, first_draw_cmd, 1078 cmdbuf->cmd_addr - 1, 1079 dmabuf, cmdbuf->vb_addr, 1080 cmdbuf->vb_size, 1081 cmdbuf->vb_stride, 1082 cmdbuf->nbox, cmdbuf->box_addr); 1083 if (ret != 0) 1084 return ret; 1085 first_draw_cmd = NULL; 1086 } 1087 } 1088 if (first_draw_cmd) 1089 continue; 1090 1091 switch (cmd_header.cmd.cmd) { 1092 case SAVAGE_CMD_STATE: 1093 j = (cmd_header.state.count + 1) / 2; 1094 if (i + j > cmdbuf->size) { 1095 DRM_ERROR("command SAVAGE_CMD_STATE extends " 1096 "beyond end of command buffer\n"); 1097 DMA_FLUSH(); 1098 ret = -EINVAL; 1099 goto done; 1100 } 1101 ret = savage_dispatch_state(dev_priv, &cmd_header, 1102 (const uint32_t *)cmdbuf->cmd_addr); 1103 cmdbuf->cmd_addr += j; 1104 i += j; 1105 break; 1106 case SAVAGE_CMD_CLEAR: 1107 if (i + 1 > cmdbuf->size) { 1108 DRM_ERROR("command SAVAGE_CMD_CLEAR extends " 1109 "beyond end of command buffer\n"); 1110 DMA_FLUSH(); 1111 ret = -EINVAL; 1112 goto done; 1113 } 1114 ret = savage_dispatch_clear(dev_priv, &cmd_header, 1115 cmdbuf->cmd_addr, 1116 cmdbuf->nbox, 1117 cmdbuf->box_addr); 1118 cmdbuf->cmd_addr++; 1119 i++; 1120 break; 1121 case SAVAGE_CMD_SWAP: 1122 ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox, 1123 cmdbuf->box_addr); 1124 break; 1125 default: 1126 DRM_ERROR("invalid command 0x%x\n", 1127 cmd_header.cmd.cmd); 1128 DMA_FLUSH(); 1129 ret = -EINVAL; 1130 goto done; 1131 } 1132 1133 if (ret != 0) { 1134 DMA_FLUSH(); 1135 goto done; 1136 } 1137 } 1138 1139 if (first_draw_cmd) { 1140 ret = savage_dispatch_draw( 1141 dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf, 1142 cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride, 1143 cmdbuf->nbox, cmdbuf->box_addr); 1144 if (ret != 0) { 1145 DMA_FLUSH(); 1146 goto done; 1147 } 1148 } 1149 1150 DMA_FLUSH(); 1151 1152 if (dmabuf && cmdbuf->discard) { 1153 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private; 1154 uint16_t event; 1155 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); 1156 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap); 1157 savage_freelist_put(dev, dmabuf); 1158 } 1159 1160done: 1161 /* If we didn't need to allocate them, these'll be NULL */ 1162 drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER); 1163 drm_free(kvb_addr, cmdbuf->vb_size, DRM_MEM_DRIVER); 1164 drm_free(kbox_addr, cmdbuf->nbox * sizeof(struct drm_clip_rect), 1165 DRM_MEM_DRIVER); 1166 1167 return ret; 1168} 1169