1/* 2 * Copyright �� 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28#include "linux/string.h" 29#include "linux/bitops.h" 30#include "drmP.h" 31#include "drm.h" 32#include "i915_drm.h" 33#include "i915_drv.h" 34 35/** @file i915_gem_tiling.c 36 * 37 * Support for managing tiling state of buffer objects. 38 * 39 * The idea behind tiling is to increase cache hit rates by rearranging 40 * pixel data so that a group of pixel accesses are in the same cacheline. 41 * Performance improvement from doing this on the back/depth buffer are on 42 * the order of 30%. 43 * 44 * Intel architectures make this somewhat more complicated, though, by 45 * adjustments made to addressing of data when the memory is in interleaved 46 * mode (matched pairs of DIMMS) to improve memory bandwidth. 47 * For interleaved memory, the CPU sends every sequential 64 bytes 48 * to an alternate memory channel so it can get the bandwidth from both. 49 * 50 * The GPU also rearranges its accesses for increased bandwidth to interleaved 51 * memory, and it matches what the CPU does for non-tiled. However, when tiled 52 * it does it a little differently, since one walks addresses not just in the 53 * X direction but also Y. So, along with alternating channels when bit 54 * 6 of the address flips, it also alternates when other bits flip -- Bits 9 55 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) 56 * are common to both the 915 and 965-class hardware. 57 * 58 * The CPU also sometimes XORs in higher bits as well, to improve 59 * bandwidth doing strided access like we do so frequently in graphics. This 60 * is called "Channel XOR Randomization" in the MCH documentation. The result 61 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address 62 * decode. 63 * 64 * All of this bit 6 XORing has an effect on our memory management, 65 * as we need to make sure that the 3d driver can correctly address object 66 * contents. 67 * 68 * If we don't have interleaved memory, all tiling is safe and no swizzling is 69 * required. 70 * 71 * When bit 17 is XORed in, we simply refuse to tile at all. Bit 72 * 17 is not just a page offset, so as we page an objet out and back in, 73 * individual pages in it will have different bit 17 addresses, resulting in 74 * each 64 bytes being swapped with its neighbor! 75 * 76 * Otherwise, if interleaved, we have to tell the 3d driver what the address 77 * swizzling it needs to do is, since it's writing with the CPU to the pages 78 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the 79 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling 80 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order 81 * to match what the GPU expects. 82 */ 83 84/** 85 * Detects bit 6 swizzling of address lookup between IGD access and CPU 86 * access through main memory. 87 */ 88void 89i915_gem_detect_bit_6_swizzle(struct drm_device *dev) 90{ 91 drm_i915_private_t *dev_priv = dev->dev_private; 92 uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 93 uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 94 95 if (IS_IRONLAKE(dev) || IS_GEN6(dev)) { 96 /* On Ironlake whatever DRAM config, GPU always do 97 * same swizzling setup. 98 */ 99 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 100 swizzle_y = I915_BIT_6_SWIZZLE_9; 101 } else if (!IS_I9XX(dev)) { 102 /* As far as we know, the 865 doesn't have these bit 6 103 * swizzling issues. 104 */ 105 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 106 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 107 } else if (IS_MOBILE(dev)) { 108 uint32_t dcc; 109 110 /* On mobile 9xx chipsets, channel interleave by the CPU is 111 * determined by DCC. For single-channel, neither the CPU 112 * nor the GPU do swizzling. For dual channel interleaved, 113 * the GPU's interleave is bit 9 and 10 for X tiled, and bit 114 * 9 for Y tiled. The CPU's interleave is independent, and 115 * can be based on either bit 11 (haven't seen this yet) or 116 * bit 17 (common). 117 */ 118 dcc = I915_READ(DCC); 119 switch (dcc & DCC_ADDRESSING_MODE_MASK) { 120 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: 121 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: 122 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 123 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 124 break; 125 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: 126 if (dcc & DCC_CHANNEL_XOR_DISABLE) { 127 /* This is the base swizzling by the GPU for 128 * tiled buffers. 129 */ 130 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 131 swizzle_y = I915_BIT_6_SWIZZLE_9; 132 } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { 133 /* Bit 11 swizzling by the CPU in addition. */ 134 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; 135 swizzle_y = I915_BIT_6_SWIZZLE_9_11; 136 } else { 137 /* Bit 17 swizzling by the CPU in addition. */ 138 swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; 139 swizzle_y = I915_BIT_6_SWIZZLE_9_17; 140 } 141 break; 142 } 143 if (dcc == 0xffffffff) { 144 DRM_ERROR("Couldn't read from MCHBAR. " 145 "Disabling tiling.\n"); 146 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 147 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 148 } 149 } else { 150 /* The 965, G33, and newer, have a very flexible memory 151 * configuration. It will enable dual-channel mode 152 * (interleaving) on as much memory as it can, and the GPU 153 * will additionally sometimes enable different bit 6 154 * swizzling for tiled objects from the CPU. 155 * 156 * Here's what I found on the G965: 157 * slot fill memory size swizzling 158 * 0A 0B 1A 1B 1-ch 2-ch 159 * 512 0 0 0 512 0 O 160 * 512 0 512 0 16 1008 X 161 * 512 0 0 512 16 1008 X 162 * 0 512 0 512 16 1008 X 163 * 1024 1024 1024 0 2048 1024 O 164 * 165 * We could probably detect this based on either the DRB 166 * matching, which was the case for the swizzling required in 167 * the table above, or from the 1-ch value being less than 168 * the minimum size of a rank. 169 */ 170 if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) { 171 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 172 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 173 } else { 174 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 175 swizzle_y = I915_BIT_6_SWIZZLE_9; 176 } 177 } 178 179 dev_priv->mm.bit_6_swizzle_x = swizzle_x; 180 dev_priv->mm.bit_6_swizzle_y = swizzle_y; 181} 182 183/* Check pitch constriants for all chips & tiling formats */ 184bool 185i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) 186{ 187 int tile_width; 188 189 /* Linear is always fine */ 190 if (tiling_mode == I915_TILING_NONE) 191 return true; 192 193 if (!IS_I9XX(dev) || 194 (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) 195 tile_width = 128; 196 else 197 tile_width = 512; 198 199 /* check maximum stride & object size */ 200 if (IS_I965G(dev)) { 201 /* i965 stores the end address of the gtt mapping in the fence 202 * reg, so dont bother to check the size */ 203 if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) 204 return false; 205 } else if (IS_GEN3(dev) || IS_GEN2(dev)) { 206 if (stride > 8192) 207 return false; 208 209 if (IS_GEN3(dev)) { 210 if (size > I830_FENCE_MAX_SIZE_VAL << 20) 211 return false; 212 } else { 213 if (size > I830_FENCE_MAX_SIZE_VAL << 19) 214 return false; 215 } 216 } 217 218 /* 965+ just needs multiples of tile width */ 219 if (IS_I965G(dev)) { 220 if (stride & (tile_width - 1)) 221 return false; 222 return true; 223 } 224 225 /* Pre-965 needs power of two tile widths */ 226 if (stride < tile_width) 227 return false; 228 229 if (stride & (stride - 1)) 230 return false; 231 232 return true; 233} 234 235bool 236i915_gem_object_fence_offset_ok(struct drm_gem_object *obj, int tiling_mode) 237{ 238 struct drm_device *dev = obj->dev; 239 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 240 241 if (obj_priv->gtt_space == NULL) 242 return true; 243 244 if (tiling_mode == I915_TILING_NONE) 245 return true; 246 247 if (!IS_I965G(dev)) { 248 if (obj_priv->gtt_offset & (obj->size - 1)) 249 return false; 250 if (IS_I9XX(dev)) { 251 if (obj_priv->gtt_offset & ~I915_FENCE_START_MASK) 252 return false; 253 } else { 254 if (obj_priv->gtt_offset & ~I830_FENCE_START_MASK) 255 return false; 256 } 257 } 258 259 return true; 260} 261 262/** 263 * Sets the tiling mode of an object, returning the required swizzling of 264 * bit 6 of addresses in the object. 265 */ 266int 267i915_gem_set_tiling(struct drm_device *dev, void *data, 268 struct drm_file *file_priv) 269{ 270 struct drm_i915_gem_set_tiling *args = data; 271 drm_i915_private_t *dev_priv = dev->dev_private; 272 struct drm_gem_object *obj; 273 struct drm_i915_gem_object *obj_priv; 274 int ret = 0; 275 276 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 277 if (obj == NULL) 278 return -ENOENT; 279 obj_priv = to_intel_bo(obj); 280 281 if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode)) { 282 drm_gem_object_unreference_unlocked(obj); 283 return -EINVAL; 284 } 285 286 if (obj_priv->pin_count) { 287 drm_gem_object_unreference_unlocked(obj); 288 return -EBUSY; 289 } 290 291 if (args->tiling_mode == I915_TILING_NONE) { 292 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 293 args->stride = 0; 294 } else { 295 if (args->tiling_mode == I915_TILING_X) 296 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; 297 else 298 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; 299 300 /* Hide bit 17 swizzling from the user. This prevents old Mesa 301 * from aborting the application on sw fallbacks to bit 17, 302 * and we use the pread/pwrite bit17 paths to swizzle for it. 303 * If there was a user that was relying on the swizzle 304 * information for drm_intel_bo_map()ed reads/writes this would 305 * break it, but we don't have any of those. 306 */ 307 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) 308 args->swizzle_mode = I915_BIT_6_SWIZZLE_9; 309 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) 310 args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; 311 312 /* If we can't handle the swizzling, make it untiled. */ 313 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { 314 args->tiling_mode = I915_TILING_NONE; 315 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 316 args->stride = 0; 317 } 318 } 319 320 mutex_lock(&dev->struct_mutex); 321 if (args->tiling_mode != obj_priv->tiling_mode || 322 args->stride != obj_priv->stride) { 323 /* We need to rebind the object if its current allocation 324 * no longer meets the alignment restrictions for its new 325 * tiling mode. Otherwise we can just leave it alone, but 326 * need to ensure that any fence register is cleared. 327 */ 328 if (!i915_gem_object_fence_offset_ok(obj, args->tiling_mode)) 329 ret = i915_gem_object_unbind(obj); 330 else if (obj_priv->fence_reg != I915_FENCE_REG_NONE) 331 ret = i915_gem_object_put_fence_reg(obj); 332 else 333 i915_gem_release_mmap(obj); 334 335 if (ret != 0) { 336 args->tiling_mode = obj_priv->tiling_mode; 337 args->stride = obj_priv->stride; 338 goto err; 339 } 340 341 obj_priv->tiling_mode = args->tiling_mode; 342 obj_priv->stride = args->stride; 343 } 344err: 345 drm_gem_object_unreference(obj); 346 mutex_unlock(&dev->struct_mutex); 347 348 return ret; 349} 350 351/** 352 * Returns the current tiling mode and required bit 6 swizzling for the object. 353 */ 354int 355i915_gem_get_tiling(struct drm_device *dev, void *data, 356 struct drm_file *file_priv) 357{ 358 struct drm_i915_gem_get_tiling *args = data; 359 drm_i915_private_t *dev_priv = dev->dev_private; 360 struct drm_gem_object *obj; 361 struct drm_i915_gem_object *obj_priv; 362 363 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 364 if (obj == NULL) 365 return -ENOENT; 366 obj_priv = to_intel_bo(obj); 367 368 mutex_lock(&dev->struct_mutex); 369 370 args->tiling_mode = obj_priv->tiling_mode; 371 switch (obj_priv->tiling_mode) { 372 case I915_TILING_X: 373 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; 374 break; 375 case I915_TILING_Y: 376 args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; 377 break; 378 case I915_TILING_NONE: 379 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 380 break; 381 default: 382 DRM_ERROR("unknown tiling mode\n"); 383 } 384 385 /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ 386 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) 387 args->swizzle_mode = I915_BIT_6_SWIZZLE_9; 388 if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) 389 args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; 390 391 drm_gem_object_unreference(obj); 392 mutex_unlock(&dev->struct_mutex); 393 394 return 0; 395} 396 397/** 398 * Swap every 64 bytes of this page around, to account for it having a new 399 * bit 17 of its physical address and therefore being interpreted differently 400 * by the GPU. 401 */ 402static int 403i915_gem_swizzle_page(struct page *page) 404{ 405 char *vaddr; 406 int i; 407 char temp[64]; 408 409 vaddr = kmap(page); 410 if (vaddr == NULL) 411 return -ENOMEM; 412 413 for (i = 0; i < PAGE_SIZE; i += 128) { 414 memcpy(temp, &vaddr[i], 64); 415 memcpy(&vaddr[i], &vaddr[i + 64], 64); 416 memcpy(&vaddr[i + 64], temp, 64); 417 } 418 419 kunmap(page); 420 421 return 0; 422} 423 424void 425i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj) 426{ 427 struct drm_device *dev = obj->dev; 428 drm_i915_private_t *dev_priv = dev->dev_private; 429 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 430 int page_count = obj->size >> PAGE_SHIFT; 431 int i; 432 433 if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17) 434 return; 435 436 if (obj_priv->bit_17 == NULL) 437 return; 438 439 for (i = 0; i < page_count; i++) { 440 char new_bit_17 = page_to_phys(obj_priv->pages[i]) >> 17; 441 if ((new_bit_17 & 0x1) != 442 (test_bit(i, obj_priv->bit_17) != 0)) { 443 int ret = i915_gem_swizzle_page(obj_priv->pages[i]); 444 if (ret != 0) { 445 DRM_ERROR("Failed to swizzle page\n"); 446 return; 447 } 448 set_page_dirty(obj_priv->pages[i]); 449 } 450 } 451} 452 453void 454i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj) 455{ 456 struct drm_device *dev = obj->dev; 457 drm_i915_private_t *dev_priv = dev->dev_private; 458 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 459 int page_count = obj->size >> PAGE_SHIFT; 460 int i; 461 462 if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17) 463 return; 464 465 if (obj_priv->bit_17 == NULL) { 466 obj_priv->bit_17 = kmalloc(BITS_TO_LONGS(page_count) * 467 sizeof(long), GFP_KERNEL); 468 if (obj_priv->bit_17 == NULL) { 469 DRM_ERROR("Failed to allocate memory for bit 17 " 470 "record\n"); 471 return; 472 } 473 } 474 475 for (i = 0; i < page_count; i++) { 476 if (page_to_phys(obj_priv->pages[i]) & (1 << 17)) 477 __set_bit(i, obj_priv->bit_17); 478 else 479 __clear_bit(i, obj_priv->bit_17); 480 } 481} 482