1/* 2 * Copyright (C) 2007 Ben Skeggs. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 */ 26 27#include "drmP.h" 28#include "drm.h" 29#include "nouveau_drv.h" 30#include "nouveau_dma.h" 31 32void 33nouveau_dma_pre_init(struct nouveau_channel *chan) 34{ 35 struct drm_nouveau_private *dev_priv = chan->dev->dev_private; 36 struct nouveau_bo *pushbuf = chan->pushbuf_bo; 37 38 if (dev_priv->card_type == NV_50) { 39 const int ib_size = pushbuf->bo.mem.size / 2; 40 41 chan->dma.ib_base = (pushbuf->bo.mem.size - ib_size) >> 2; 42 chan->dma.ib_max = (ib_size / 8) - 1; 43 chan->dma.ib_put = 0; 44 chan->dma.ib_free = chan->dma.ib_max - chan->dma.ib_put; 45 46 chan->dma.max = (pushbuf->bo.mem.size - ib_size) >> 2; 47 } else { 48 chan->dma.max = (pushbuf->bo.mem.size >> 2) - 2; 49 } 50 51 chan->dma.put = 0; 52 chan->dma.cur = chan->dma.put; 53 chan->dma.free = chan->dma.max - chan->dma.cur; 54} 55 56int 57nouveau_dma_init(struct nouveau_channel *chan) 58{ 59 struct drm_device *dev = chan->dev; 60 struct drm_nouveau_private *dev_priv = dev->dev_private; 61 struct nouveau_gpuobj *m2mf = NULL; 62 struct nouveau_gpuobj *nvsw = NULL; 63 int ret, i; 64 65 /* Create NV_MEMORY_TO_MEMORY_FORMAT for buffer moves */ 66 ret = nouveau_gpuobj_gr_new(chan, dev_priv->card_type < NV_50 ? 67 0x0039 : 0x5039, &m2mf); 68 if (ret) 69 return ret; 70 71 ret = nouveau_gpuobj_ref_add(dev, chan, NvM2MF, m2mf, NULL); 72 if (ret) 73 return ret; 74 75 /* Create an NV_SW object for various sync purposes */ 76 ret = nouveau_gpuobj_sw_new(chan, NV_SW, &nvsw); 77 if (ret) 78 return ret; 79 80 ret = nouveau_gpuobj_ref_add(dev, chan, NvSw, nvsw, NULL); 81 if (ret) 82 return ret; 83 84 /* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */ 85 ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy); 86 if (ret) 87 return ret; 88 89 /* Map push buffer */ 90 ret = nouveau_bo_map(chan->pushbuf_bo); 91 if (ret) 92 return ret; 93 94 /* Map M2MF notifier object - fbcon. */ 95 ret = nouveau_bo_map(chan->notifier_bo); 96 if (ret) 97 return ret; 98 99 /* Insert NOPS for NOUVEAU_DMA_SKIPS */ 100 ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS); 101 if (ret) 102 return ret; 103 104 for (i = 0; i < NOUVEAU_DMA_SKIPS; i++) 105 OUT_RING(chan, 0); 106 107 /* Initialise NV_MEMORY_TO_MEMORY_FORMAT */ 108 ret = RING_SPACE(chan, 4); 109 if (ret) 110 return ret; 111 BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NAME, 1); 112 OUT_RING(chan, NvM2MF); 113 BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); 114 OUT_RING(chan, NvNotify0); 115 116 /* Initialise NV_SW */ 117 ret = RING_SPACE(chan, 2); 118 if (ret) 119 return ret; 120 BEGIN_RING(chan, NvSubSw, 0, 1); 121 OUT_RING(chan, NvSw); 122 123 /* Sit back and pray the channel works.. */ 124 FIRE_RING(chan); 125 126 return 0; 127} 128 129void 130OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords) 131{ 132 bool is_iomem; 133 u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem); 134 mem = &mem[chan->dma.cur]; 135 if (is_iomem) 136 memcpy_toio((void __force __iomem *)mem, data, nr_dwords * 4); 137 else 138 memcpy(mem, data, nr_dwords * 4); 139 chan->dma.cur += nr_dwords; 140} 141 142/* Fetch and adjust GPU GET pointer 143 * 144 * Returns: 145 * value >= 0, the adjusted GET pointer 146 * -EINVAL if GET pointer currently outside main push buffer 147 * -EBUSY if timeout exceeded 148 */ 149static inline int 150READ_GET(struct nouveau_channel *chan, uint32_t *prev_get, uint32_t *timeout) 151{ 152 uint32_t val; 153 154 val = nvchan_rd32(chan, chan->user_get); 155 156 /* reset counter as long as GET is still advancing, this is 157 * to avoid misdetecting a GPU lockup if the GPU happens to 158 * just be processing an operation that takes a long time 159 */ 160 if (val != *prev_get) { 161 *prev_get = val; 162 *timeout = 0; 163 } 164 165 if ((++*timeout & 0xff) == 0) { 166 DRM_UDELAY(1); 167 if (*timeout > 100000) 168 return -EBUSY; 169 } 170 171 if (val < chan->pushbuf_base || 172 val > chan->pushbuf_base + (chan->dma.max << 2)) 173 return -EINVAL; 174 175 return (val - chan->pushbuf_base) >> 2; 176} 177 178void 179nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo, 180 int delta, int length) 181{ 182 struct nouveau_bo *pb = chan->pushbuf_bo; 183 uint64_t offset = bo->bo.offset + delta; 184 int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; 185 186 BUG_ON(chan->dma.ib_free < 1); 187 nouveau_bo_wr32(pb, ip++, lower_32_bits(offset)); 188 nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8); 189 190 chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max; 191 192 DRM_MEMORYBARRIER(); 193 /* Flush writes. */ 194 nouveau_bo_rd32(pb, 0); 195 196 nvchan_wr32(chan, 0x8c, chan->dma.ib_put); 197 chan->dma.ib_free--; 198} 199 200static int 201nv50_dma_push_wait(struct nouveau_channel *chan, int count) 202{ 203 uint32_t cnt = 0, prev_get = 0; 204 205 while (chan->dma.ib_free < count) { 206 uint32_t get = nvchan_rd32(chan, 0x88); 207 if (get != prev_get) { 208 prev_get = get; 209 cnt = 0; 210 } 211 212 if ((++cnt & 0xff) == 0) { 213 DRM_UDELAY(1); 214 if (cnt > 100000) 215 return -EBUSY; 216 } 217 218 chan->dma.ib_free = get - chan->dma.ib_put; 219 if (chan->dma.ib_free <= 0) 220 chan->dma.ib_free += chan->dma.ib_max + 1; 221 } 222 223 return 0; 224} 225 226static int 227nv50_dma_wait(struct nouveau_channel *chan, int slots, int count) 228{ 229 uint32_t cnt = 0, prev_get = 0; 230 int ret; 231 232 ret = nv50_dma_push_wait(chan, slots + 1); 233 if (unlikely(ret)) 234 return ret; 235 236 while (chan->dma.free < count) { 237 int get = READ_GET(chan, &prev_get, &cnt); 238 if (unlikely(get < 0)) { 239 if (get == -EINVAL) 240 continue; 241 242 return get; 243 } 244 245 if (get <= chan->dma.cur) { 246 chan->dma.free = chan->dma.max - chan->dma.cur; 247 if (chan->dma.free >= count) 248 break; 249 250 FIRE_RING(chan); 251 do { 252 get = READ_GET(chan, &prev_get, &cnt); 253 if (unlikely(get < 0)) { 254 if (get == -EINVAL) 255 continue; 256 return get; 257 } 258 } while (get == 0); 259 chan->dma.cur = 0; 260 chan->dma.put = 0; 261 } 262 263 chan->dma.free = get - chan->dma.cur - 1; 264 } 265 266 return 0; 267} 268 269int 270nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size) 271{ 272 uint32_t prev_get = 0, cnt = 0; 273 int get; 274 275 if (chan->dma.ib_max) 276 return nv50_dma_wait(chan, slots, size); 277 278 while (chan->dma.free < size) { 279 get = READ_GET(chan, &prev_get, &cnt); 280 if (unlikely(get == -EBUSY)) 281 return -EBUSY; 282 283 /* loop until we have a usable GET pointer. the value 284 * we read from the GPU may be outside the main ring if 285 * PFIFO is processing a buffer called from the main ring, 286 * discard these values until something sensible is seen. 287 * 288 * the other case we discard GET is while the GPU is fetching 289 * from the SKIPS area, so the code below doesn't have to deal 290 * with some fun corner cases. 291 */ 292 if (unlikely(get == -EINVAL) || get < NOUVEAU_DMA_SKIPS) 293 continue; 294 295 if (get <= chan->dma.cur) { 296 /* engine is fetching behind us, or is completely 297 * idle (GET == PUT) so we have free space up until 298 * the end of the push buffer 299 * 300 * we can only hit that path once per call due to 301 * looping back to the beginning of the push buffer, 302 * we'll hit the fetching-ahead-of-us path from that 303 * point on. 304 * 305 * the *one* exception to that rule is if we read 306 * GET==PUT, in which case the below conditional will 307 * always succeed and break us out of the wait loop. 308 */ 309 chan->dma.free = chan->dma.max - chan->dma.cur; 310 if (chan->dma.free >= size) 311 break; 312 313 /* not enough space left at the end of the push buffer, 314 * instruct the GPU to jump back to the start right 315 * after processing the currently pending commands. 316 */ 317 OUT_RING(chan, chan->pushbuf_base | 0x20000000); 318 319 /* wait for GET to depart from the skips area. 320 * prevents writing GET==PUT and causing a race 321 * condition that causes us to think the GPU is 322 * idle when it's not. 323 */ 324 do { 325 get = READ_GET(chan, &prev_get, &cnt); 326 if (unlikely(get == -EBUSY)) 327 return -EBUSY; 328 if (unlikely(get == -EINVAL)) 329 continue; 330 } while (get <= NOUVEAU_DMA_SKIPS); 331 WRITE_PUT(NOUVEAU_DMA_SKIPS); 332 333 /* we're now submitting commands at the start of 334 * the push buffer. 335 */ 336 chan->dma.cur = 337 chan->dma.put = NOUVEAU_DMA_SKIPS; 338 } 339 340 /* engine fetching ahead of us, we have space up until the 341 * current GET pointer. the "- 1" is to ensure there's 342 * space left to emit a jump back to the beginning of the 343 * push buffer if we require it. we can never get GET == PUT 344 * here, so this is safe. 345 */ 346 chan->dma.free = get - chan->dma.cur - 1; 347 } 348 349 return 0; 350} 351