1 2/* 3 * ATI Mach64 Hardware Acceleration 4 */ 5 6#include <linux/delay.h> 7#include <linux/fb.h> 8#include <video/mach64.h> 9#include "atyfb.h" 10 11 /* 12 * Generic Mach64 routines 13 */ 14 15/* this is for DMA GUI engine! work in progress */ 16typedef struct { 17 u32 frame_buf_offset; 18 u32 system_mem_addr; 19 u32 command; 20 u32 reserved; 21} BM_DESCRIPTOR_ENTRY; 22 23#define LAST_DESCRIPTOR (1 << 31) 24#define SYSTEM_TO_FRAME_BUFFER 0 25 26static u32 rotation24bpp(u32 dx, u32 direction) 27{ 28 u32 rotation; 29 if (direction & DST_X_LEFT_TO_RIGHT) { 30 rotation = (dx / 4) % 6; 31 } else { 32 rotation = ((dx + 2) / 4) % 6; 33 } 34 35 return ((rotation << 8) | DST_24_ROTATION_ENABLE); 36} 37 38void aty_reset_engine(const struct atyfb_par *par) 39{ 40 /* reset engine */ 41 aty_st_le32(GEN_TEST_CNTL, 42 aty_ld_le32(GEN_TEST_CNTL, par) & 43 ~(GUI_ENGINE_ENABLE | HWCURSOR_ENABLE), par); 44 /* enable engine */ 45 aty_st_le32(GEN_TEST_CNTL, 46 aty_ld_le32(GEN_TEST_CNTL, par) | GUI_ENGINE_ENABLE, par); 47 /* ensure engine is not locked up by clearing any FIFO or */ 48 /* HOST errors */ 49 aty_st_le32(BUS_CNTL, 50 aty_ld_le32(BUS_CNTL, par) | BUS_HOST_ERR_ACK | BUS_FIFO_ERR_ACK, par); 51} 52 53static void reset_GTC_3D_engine(const struct atyfb_par *par) 54{ 55 aty_st_le32(SCALE_3D_CNTL, 0xc0, par); 56 mdelay(GTC_3D_RESET_DELAY); 57 aty_st_le32(SETUP_CNTL, 0x00, par); 58 mdelay(GTC_3D_RESET_DELAY); 59 aty_st_le32(SCALE_3D_CNTL, 0x00, par); 60 mdelay(GTC_3D_RESET_DELAY); 61} 62 63void aty_init_engine(struct atyfb_par *par, struct fb_info *info) 64{ 65 u32 pitch_value; 66 u32 vxres; 67 68 /* determine modal information from global mode structure */ 69 pitch_value = info->fix.line_length / (info->var.bits_per_pixel / 8); 70 vxres = info->var.xres_virtual; 71 72 if (info->var.bits_per_pixel == 24) { 73 /* In 24 bpp, the engine is in 8 bpp - this requires that all */ 74 /* horizontal coordinates and widths must be adjusted */ 75 pitch_value *= 3; 76 vxres *= 3; 77 } 78 79 /* On GTC (RagePro), we need to reset the 3D engine before */ 80 if (M64_HAS(RESET_3D)) 81 reset_GTC_3D_engine(par); 82 83 /* Reset engine, enable, and clear any engine errors */ 84 aty_reset_engine(par); 85 /* Ensure that vga page pointers are set to zero - the upper */ 86 /* page pointers are set to 1 to handle overflows in the */ 87 /* lower page */ 88 aty_st_le32(MEM_VGA_WP_SEL, 0x00010000, par); 89 aty_st_le32(MEM_VGA_RP_SEL, 0x00010000, par); 90 91 /* ---- Setup standard engine context ---- */ 92 93 /* All GUI registers here are FIFOed - therefore, wait for */ 94 /* the appropriate number of empty FIFO entries */ 95 wait_for_fifo(14, par); 96 97 /* enable all registers to be loaded for context loads */ 98 aty_st_le32(CONTEXT_MASK, 0xFFFFFFFF, par); 99 100 /* set destination pitch to modal pitch, set offset to zero */ 101 aty_st_le32(DST_OFF_PITCH, (pitch_value / 8) << 22, par); 102 103 /* zero these registers (set them to a known state) */ 104 aty_st_le32(DST_Y_X, 0, par); 105 aty_st_le32(DST_HEIGHT, 0, par); 106 aty_st_le32(DST_BRES_ERR, 0, par); 107 aty_st_le32(DST_BRES_INC, 0, par); 108 aty_st_le32(DST_BRES_DEC, 0, par); 109 110 /* set destination drawing attributes */ 111 aty_st_le32(DST_CNTL, DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM | 112 DST_X_LEFT_TO_RIGHT, par); 113 114 /* set source pitch to modal pitch, set offset to zero */ 115 aty_st_le32(SRC_OFF_PITCH, (pitch_value / 8) << 22, par); 116 117 /* set these registers to a known state */ 118 aty_st_le32(SRC_Y_X, 0, par); 119 aty_st_le32(SRC_HEIGHT1_WIDTH1, 1, par); 120 aty_st_le32(SRC_Y_X_START, 0, par); 121 aty_st_le32(SRC_HEIGHT2_WIDTH2, 1, par); 122 123 /* set source pixel retrieving attributes */ 124 aty_st_le32(SRC_CNTL, SRC_LINE_X_LEFT_TO_RIGHT, par); 125 126 /* set host attributes */ 127 wait_for_fifo(13, par); 128 aty_st_le32(HOST_CNTL, 0, par); 129 130 /* set pattern attributes */ 131 aty_st_le32(PAT_REG0, 0, par); 132 aty_st_le32(PAT_REG1, 0, par); 133 aty_st_le32(PAT_CNTL, 0, par); 134 135 /* set scissors to modal size */ 136 aty_st_le32(SC_LEFT, 0, par); 137 aty_st_le32(SC_TOP, 0, par); 138 aty_st_le32(SC_BOTTOM, par->crtc.vyres - 1, par); 139 aty_st_le32(SC_RIGHT, vxres - 1, par); 140 141 /* set background color to minimum value (usually BLACK) */ 142 aty_st_le32(DP_BKGD_CLR, 0, par); 143 144 /* set foreground color to maximum value (usually WHITE) */ 145 aty_st_le32(DP_FRGD_CLR, 0xFFFFFFFF, par); 146 147 /* set write mask to effect all pixel bits */ 148 aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, par); 149 150 /* set foreground mix to overpaint and background mix to */ 151 /* no-effect */ 152 aty_st_le32(DP_MIX, FRGD_MIX_S | BKGD_MIX_D, par); 153 154 /* set primary source pixel channel to foreground color */ 155 /* register */ 156 aty_st_le32(DP_SRC, FRGD_SRC_FRGD_CLR, par); 157 158 /* set compare functionality to false (no-effect on */ 159 /* destination) */ 160 wait_for_fifo(3, par); 161 aty_st_le32(CLR_CMP_CLR, 0, par); 162 aty_st_le32(CLR_CMP_MASK, 0xFFFFFFFF, par); 163 aty_st_le32(CLR_CMP_CNTL, 0, par); 164 165 /* set pixel depth */ 166 wait_for_fifo(2, par); 167 aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par); 168 aty_st_le32(DP_CHAIN_MASK, par->crtc.dp_chain_mask, par); 169 170 wait_for_fifo(5, par); 171 aty_st_le32(SCALE_3D_CNTL, 0, par); 172 aty_st_le32(Z_CNTL, 0, par); 173 aty_st_le32(CRTC_INT_CNTL, aty_ld_le32(CRTC_INT_CNTL, par) & ~0x20, 174 par); 175 aty_st_le32(GUI_TRAJ_CNTL, 0x100023, par); 176 177 /* insure engine is idle before leaving */ 178 wait_for_idle(par); 179} 180 181 /* 182 * Accelerated functions 183 */ 184 185static inline void draw_rect(s16 x, s16 y, u16 width, u16 height, 186 struct atyfb_par *par) 187{ 188 /* perform rectangle fill */ 189 wait_for_fifo(2, par); 190 aty_st_le32(DST_Y_X, (x << 16) | y, par); 191 aty_st_le32(DST_HEIGHT_WIDTH, (width << 16) | height, par); 192 par->blitter_may_be_busy = 1; 193} 194 195void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) 196{ 197 struct atyfb_par *par = (struct atyfb_par *) info->par; 198 u32 dy = area->dy, sy = area->sy, direction = DST_LAST_PEL; 199 u32 sx = area->sx, dx = area->dx, width = area->width, rotation = 0; 200 201 if (par->asleep) 202 return; 203 if (!area->width || !area->height) 204 return; 205 if (!par->accel_flags) { 206 cfb_copyarea(info, area); 207 return; 208 } 209 210 if (info->var.bits_per_pixel == 24) { 211 /* In 24 bpp, the engine is in 8 bpp - this requires that all */ 212 /* horizontal coordinates and widths must be adjusted */ 213 sx *= 3; 214 dx *= 3; 215 width *= 3; 216 } 217 218 if (area->sy < area->dy) { 219 dy += area->height - 1; 220 sy += area->height - 1; 221 } else 222 direction |= DST_Y_TOP_TO_BOTTOM; 223 224 if (sx < dx) { 225 dx += width - 1; 226 sx += width - 1; 227 } else 228 direction |= DST_X_LEFT_TO_RIGHT; 229 230 if (info->var.bits_per_pixel == 24) { 231 rotation = rotation24bpp(dx, direction); 232 } 233 234 wait_for_fifo(4, par); 235 aty_st_le32(DP_SRC, FRGD_SRC_BLIT, par); 236 aty_st_le32(SRC_Y_X, (sx << 16) | sy, par); 237 aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | area->height, par); 238 aty_st_le32(DST_CNTL, direction | rotation, par); 239 draw_rect(dx, dy, width, area->height, par); 240} 241 242void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) 243{ 244 struct atyfb_par *par = (struct atyfb_par *) info->par; 245 u32 color, dx = rect->dx, width = rect->width, rotation = 0; 246 247 if (par->asleep) 248 return; 249 if (!rect->width || !rect->height) 250 return; 251 if (!par->accel_flags) { 252 cfb_fillrect(info, rect); 253 return; 254 } 255 256 if (info->fix.visual == FB_VISUAL_TRUECOLOR || 257 info->fix.visual == FB_VISUAL_DIRECTCOLOR) 258 color = ((u32 *)(info->pseudo_palette))[rect->color]; 259 else 260 color = rect->color; 261 262 if (info->var.bits_per_pixel == 24) { 263 /* In 24 bpp, the engine is in 8 bpp - this requires that all */ 264 /* horizontal coordinates and widths must be adjusted */ 265 dx *= 3; 266 width *= 3; 267 rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT); 268 } 269 270 wait_for_fifo(3, par); 271 aty_st_le32(DP_FRGD_CLR, color, par); 272 aty_st_le32(DP_SRC, 273 BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE, 274 par); 275 aty_st_le32(DST_CNTL, 276 DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM | 277 DST_X_LEFT_TO_RIGHT | rotation, par); 278 draw_rect(dx, rect->dy, width, rect->height, par); 279} 280 281void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) 282{ 283 struct atyfb_par *par = (struct atyfb_par *) info->par; 284 u32 src_bytes, dx = image->dx, dy = image->dy, width = image->width; 285 u32 pix_width_save, pix_width, host_cntl, rotation = 0, src, mix; 286 287 if (par->asleep) 288 return; 289 if (!image->width || !image->height) 290 return; 291 if (!par->accel_flags || 292 (image->depth != 1 && info->var.bits_per_pixel != image->depth)) { 293 cfb_imageblit(info, image); 294 return; 295 } 296 297 pix_width = pix_width_save = aty_ld_le32(DP_PIX_WIDTH, par); 298 host_cntl = aty_ld_le32(HOST_CNTL, par) | HOST_BYTE_ALIGN; 299 300 switch (image->depth) { 301 case 1: 302 pix_width &= ~(BYTE_ORDER_MASK | HOST_MASK); 303 pix_width |= (BYTE_ORDER_MSB_TO_LSB | HOST_1BPP); 304 break; 305 case 4: 306 pix_width &= ~(BYTE_ORDER_MASK | HOST_MASK); 307 pix_width |= (BYTE_ORDER_MSB_TO_LSB | HOST_4BPP); 308 break; 309 case 8: 310 pix_width &= ~HOST_MASK; 311 pix_width |= HOST_8BPP; 312 break; 313 case 15: 314 pix_width &= ~HOST_MASK; 315 pix_width |= HOST_15BPP; 316 break; 317 case 16: 318 pix_width &= ~HOST_MASK; 319 pix_width |= HOST_16BPP; 320 break; 321 case 24: 322 pix_width &= ~HOST_MASK; 323 pix_width |= HOST_24BPP; 324 break; 325 case 32: 326 pix_width &= ~HOST_MASK; 327 pix_width |= HOST_32BPP; 328 break; 329 } 330 331 if (info->var.bits_per_pixel == 24) { 332 /* In 24 bpp, the engine is in 8 bpp - this requires that all */ 333 /* horizontal coordinates and widths must be adjusted */ 334 dx *= 3; 335 width *= 3; 336 337 rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT); 338 339 pix_width &= ~DST_MASK; 340 pix_width |= DST_8BPP; 341 342 /* 343 * since Rage 3D IIc we have DP_HOST_TRIPLE_EN bit 344 * this hwaccelerated triple has an issue with not aligned data 345 */ 346 if (M64_HAS(HW_TRIPLE) && image->width % 8 == 0) 347 pix_width |= DP_HOST_TRIPLE_EN; 348 } 349 350 if (image->depth == 1) { 351 u32 fg, bg; 352 if (info->fix.visual == FB_VISUAL_TRUECOLOR || 353 info->fix.visual == FB_VISUAL_DIRECTCOLOR) { 354 fg = ((u32*)(info->pseudo_palette))[image->fg_color]; 355 bg = ((u32*)(info->pseudo_palette))[image->bg_color]; 356 } else { 357 fg = image->fg_color; 358 bg = image->bg_color; 359 } 360 361 wait_for_fifo(2, par); 362 aty_st_le32(DP_BKGD_CLR, bg, par); 363 aty_st_le32(DP_FRGD_CLR, fg, par); 364 src = MONO_SRC_HOST | FRGD_SRC_FRGD_CLR | BKGD_SRC_BKGD_CLR; 365 mix = FRGD_MIX_S | BKGD_MIX_S; 366 } else { 367 src = MONO_SRC_ONE | FRGD_SRC_HOST; 368 mix = FRGD_MIX_D_XOR_S | BKGD_MIX_D; 369 } 370 371 wait_for_fifo(6, par); 372 aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, par); 373 aty_st_le32(DP_PIX_WIDTH, pix_width, par); 374 aty_st_le32(DP_MIX, mix, par); 375 aty_st_le32(DP_SRC, src, par); 376 aty_st_le32(HOST_CNTL, host_cntl, par); 377 aty_st_le32(DST_CNTL, DST_Y_TOP_TO_BOTTOM | DST_X_LEFT_TO_RIGHT | rotation, par); 378 379 draw_rect(dx, dy, width, image->height, par); 380 src_bytes = (((image->width * image->depth) + 7) / 8) * image->height; 381 382 /* manual triple each pixel */ 383 if (info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { 384 int inbit, outbit, mult24, byte_id_in_dword, width; 385 u8 *pbitmapin = (u8*)image->data, *pbitmapout; 386 u32 hostdword; 387 388 for (width = image->width, inbit = 7, mult24 = 0; src_bytes; ) { 389 for (hostdword = 0, pbitmapout = (u8*)&hostdword, byte_id_in_dword = 0; 390 byte_id_in_dword < 4 && src_bytes; 391 byte_id_in_dword++, pbitmapout++) { 392 for (outbit = 7; outbit >= 0; outbit--) { 393 *pbitmapout |= (((*pbitmapin >> inbit) & 1) << outbit); 394 mult24++; 395 /* next bit */ 396 if (mult24 == 3) { 397 mult24 = 0; 398 inbit--; 399 width--; 400 } 401 402 /* next byte */ 403 if (inbit < 0 || width == 0) { 404 src_bytes--; 405 pbitmapin++; 406 inbit = 7; 407 408 if (width == 0) { 409 width = image->width; 410 outbit = 0; 411 } 412 } 413 } 414 } 415 wait_for_fifo(1, par); 416 aty_st_le32(HOST_DATA0, hostdword, par); 417 } 418 } else { 419 u32 *pbitmap, dwords = (src_bytes + 3) / 4; 420 for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) { 421 wait_for_fifo(1, par); 422 aty_st_le32(HOST_DATA0, le32_to_cpup(pbitmap), par); 423 } 424 } 425 426 /* restore pix_width */ 427 wait_for_fifo(1, par); 428 aty_st_le32(DP_PIX_WIDTH, pix_width_save, par); 429} 430