intel_ringbuffer.c revision 253709
1/*
2 * Copyright �� 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *    Zou Nan hai <nanhai.zou@intel.com>
26 *    Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 253709 2013-07-27 16:42:29Z kib $");
32
33#include <dev/drm2/drmP.h>
34#include <dev/drm2/drm.h>
35#include <dev/drm2/i915/i915_drm.h>
36#include <dev/drm2/i915/i915_drv.h>
37#include <dev/drm2/i915/intel_drv.h>
38#include <dev/drm2/i915/intel_ringbuffer.h>
39#include <sys/sched.h>
40#include <sys/sf_buf.h>
41
42/*
43 * 965+ support PIPE_CONTROL commands, which provide finer grained control
44 * over cache flushing.
45 */
46struct pipe_control {
47	struct drm_i915_gem_object *obj;
48	volatile u32 *cpu_page;
49	u32 gtt_offset;
50};
51
52void
53i915_trace_irq_get(struct intel_ring_buffer *ring, uint32_t seqno)
54{
55
56	if (ring->trace_irq_seqno == 0) {
57		mtx_lock(&ring->irq_lock);
58		if (ring->irq_get(ring))
59			ring->trace_irq_seqno = seqno;
60		mtx_unlock(&ring->irq_lock);
61	}
62}
63
64static inline int ring_space(struct intel_ring_buffer *ring)
65{
66	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
67	if (space < 0)
68		space += ring->size;
69	return space;
70}
71
72static int
73render_ring_flush(struct intel_ring_buffer *ring,
74		  uint32_t	invalidate_domains,
75		  uint32_t	flush_domains)
76{
77	struct drm_device *dev = ring->dev;
78	uint32_t cmd;
79	int ret;
80
81	/*
82	 * read/write caches:
83	 *
84	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
85	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
86	 * also flushed at 2d versus 3d pipeline switches.
87	 *
88	 * read-only caches:
89	 *
90	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
91	 * MI_READ_FLUSH is set, and is always flushed on 965.
92	 *
93	 * I915_GEM_DOMAIN_COMMAND may not exist?
94	 *
95	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
96	 * invalidated when MI_EXE_FLUSH is set.
97	 *
98	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
99	 * invalidated with every MI_FLUSH.
100	 *
101	 * TLBs:
102	 *
103	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
104	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
105	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
106	 * are flushed at any MI_FLUSH.
107	 */
108
109	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
110	if ((invalidate_domains|flush_domains) &
111	    I915_GEM_DOMAIN_RENDER)
112		cmd &= ~MI_NO_WRITE_FLUSH;
113	if (INTEL_INFO(dev)->gen < 4) {
114		/*
115		 * On the 965, the sampler cache always gets flushed
116		 * and this bit is reserved.
117		 */
118		if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
119			cmd |= MI_READ_FLUSH;
120	}
121	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
122		cmd |= MI_EXE_FLUSH;
123
124	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
125	    (IS_G4X(dev) || IS_GEN5(dev)))
126		cmd |= MI_INVALIDATE_ISP;
127
128	ret = intel_ring_begin(ring, 2);
129	if (ret)
130		return ret;
131
132	intel_ring_emit(ring, cmd);
133	intel_ring_emit(ring, MI_NOOP);
134	intel_ring_advance(ring);
135
136	return 0;
137}
138
139/**
140 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
141 * implementing two workarounds on gen6.  From section 1.4.7.1
142 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
143 *
144 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
145 * produced by non-pipelined state commands), software needs to first
146 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
147 * 0.
148 *
149 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
150 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
151 *
152 * And the workaround for these two requires this workaround first:
153 *
154 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
155 * BEFORE the pipe-control with a post-sync op and no write-cache
156 * flushes.
157 *
158 * And this last workaround is tricky because of the requirements on
159 * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
160 * volume 2 part 1:
161 *
162 *     "1 of the following must also be set:
163 *      - Render Target Cache Flush Enable ([12] of DW1)
164 *      - Depth Cache Flush Enable ([0] of DW1)
165 *      - Stall at Pixel Scoreboard ([1] of DW1)
166 *      - Depth Stall ([13] of DW1)
167 *      - Post-Sync Operation ([13] of DW1)
168 *      - Notify Enable ([8] of DW1)"
169 *
170 * The cache flushes require the workaround flush that triggered this
171 * one, so we can't use it.  Depth stall would trigger the same.
172 * Post-sync nonzero is what triggered this second workaround, so we
173 * can't use that one either.  Notify enable is IRQs, which aren't
174 * really our business.  That leaves only stall at scoreboard.
175 */
176static int
177intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
178{
179	struct pipe_control *pc = ring->private;
180	u32 scratch_addr = pc->gtt_offset + 128;
181	int ret;
182
183
184	ret = intel_ring_begin(ring, 6);
185	if (ret)
186		return ret;
187
188	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
189	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
190			PIPE_CONTROL_STALL_AT_SCOREBOARD);
191	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
192	intel_ring_emit(ring, 0); /* low dword */
193	intel_ring_emit(ring, 0); /* high dword */
194	intel_ring_emit(ring, MI_NOOP);
195	intel_ring_advance(ring);
196
197	ret = intel_ring_begin(ring, 6);
198	if (ret)
199		return ret;
200
201	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
202	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
203	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
204	intel_ring_emit(ring, 0);
205	intel_ring_emit(ring, 0);
206	intel_ring_emit(ring, MI_NOOP);
207	intel_ring_advance(ring);
208
209	return 0;
210}
211
212static int
213gen6_render_ring_flush(struct intel_ring_buffer *ring,
214                         u32 invalidate_domains, u32 flush_domains)
215{
216	u32 flags = 0;
217	struct pipe_control *pc = ring->private;
218	u32 scratch_addr = pc->gtt_offset + 128;
219	int ret;
220
221	/* Force SNB workarounds for PIPE_CONTROL flushes */
222	intel_emit_post_sync_nonzero_flush(ring);
223
224	/* Just flush everything.  Experiments have shown that reducing the
225	 * number of bits based on the write domains has little performance
226	 * impact.
227	 */
228	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
229	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
230	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
231	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
232	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
233	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
234	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
235
236	ret = intel_ring_begin(ring, 6);
237	if (ret)
238		return ret;
239
240	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
241	intel_ring_emit(ring, flags);
242	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
243	intel_ring_emit(ring, 0); /* lower dword */
244	intel_ring_emit(ring, 0); /* uppwer dword */
245	intel_ring_emit(ring, MI_NOOP);
246	intel_ring_advance(ring);
247
248	return 0;
249}
250
251static void ring_write_tail(struct intel_ring_buffer *ring,
252			    uint32_t value)
253{
254	drm_i915_private_t *dev_priv = ring->dev->dev_private;
255	I915_WRITE_TAIL(ring, value);
256}
257
258u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
259{
260	drm_i915_private_t *dev_priv = ring->dev->dev_private;
261	uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
262			RING_ACTHD(ring->mmio_base) : ACTHD;
263
264	return I915_READ(acthd_reg);
265}
266
267static int init_ring_common(struct intel_ring_buffer *ring)
268{
269	drm_i915_private_t *dev_priv = ring->dev->dev_private;
270	struct drm_i915_gem_object *obj = ring->obj;
271	uint32_t head;
272
273	/* Stop the ring if it's running. */
274	I915_WRITE_CTL(ring, 0);
275	I915_WRITE_HEAD(ring, 0);
276	ring->write_tail(ring, 0);
277
278	/* Initialize the ring. */
279	I915_WRITE_START(ring, obj->gtt_offset);
280	head = I915_READ_HEAD(ring) & HEAD_ADDR;
281
282	/* G45 ring initialization fails to reset head to zero */
283	if (head != 0) {
284		DRM_DEBUG("%s head not reset to zero "
285			      "ctl %08x head %08x tail %08x start %08x\n",
286			      ring->name,
287			      I915_READ_CTL(ring),
288			      I915_READ_HEAD(ring),
289			      I915_READ_TAIL(ring),
290			      I915_READ_START(ring));
291
292		I915_WRITE_HEAD(ring, 0);
293
294		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
295			DRM_ERROR("failed to set %s head to zero "
296				  "ctl %08x head %08x tail %08x start %08x\n",
297				  ring->name,
298				  I915_READ_CTL(ring),
299				  I915_READ_HEAD(ring),
300				  I915_READ_TAIL(ring),
301				  I915_READ_START(ring));
302		}
303	}
304
305	I915_WRITE_CTL(ring,
306			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
307			| RING_VALID);
308
309	/* If the head is still not zero, the ring is dead */
310	if (_intel_wait_for(ring->dev,
311	    (I915_READ_CTL(ring) & RING_VALID) != 0 &&
312	     I915_READ_START(ring) == obj->gtt_offset &&
313	     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0,
314	    50, 1, "915rii")) {
315		DRM_ERROR("%s initialization failed "
316				"ctl %08x head %08x tail %08x start %08x\n",
317				ring->name,
318				I915_READ_CTL(ring),
319				I915_READ_HEAD(ring),
320				I915_READ_TAIL(ring),
321				I915_READ_START(ring));
322		return -EIO;
323	}
324
325	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
326		i915_kernel_lost_context(ring->dev);
327	else {
328		ring->head = I915_READ_HEAD(ring);
329		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
330		ring->space = ring_space(ring);
331	}
332
333	return 0;
334}
335
336static int
337init_pipe_control(struct intel_ring_buffer *ring)
338{
339	struct pipe_control *pc;
340	struct drm_i915_gem_object *obj;
341	int ret;
342
343	if (ring->private)
344		return 0;
345
346	pc = malloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
347	if (!pc)
348		return -ENOMEM;
349
350	obj = i915_gem_alloc_object(ring->dev, 4096);
351	if (obj == NULL) {
352		DRM_ERROR("Failed to allocate seqno page\n");
353		ret = -ENOMEM;
354		goto err;
355	}
356
357	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
358
359	ret = i915_gem_object_pin(obj, 4096, true);
360	if (ret)
361		goto err_unref;
362
363	pc->gtt_offset = obj->gtt_offset;
364	pc->cpu_page = (uint32_t *)kmem_alloc_nofault(kernel_map, PAGE_SIZE);
365	if (pc->cpu_page == NULL)
366		goto err_unpin;
367	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
368	pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
369	    (vm_offset_t)pc->cpu_page + PAGE_SIZE);
370
371	pc->obj = obj;
372	ring->private = pc;
373	return 0;
374
375err_unpin:
376	i915_gem_object_unpin(obj);
377err_unref:
378	drm_gem_object_unreference(&obj->base);
379err:
380	free(pc, DRM_I915_GEM);
381	return ret;
382}
383
384static void
385cleanup_pipe_control(struct intel_ring_buffer *ring)
386{
387	struct pipe_control *pc = ring->private;
388	struct drm_i915_gem_object *obj;
389
390	if (!ring->private)
391		return;
392
393	obj = pc->obj;
394	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
395	kmem_free(kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE);
396	i915_gem_object_unpin(obj);
397	drm_gem_object_unreference(&obj->base);
398
399	free(pc, DRM_I915_GEM);
400	ring->private = NULL;
401}
402
403static int init_render_ring(struct intel_ring_buffer *ring)
404{
405	struct drm_device *dev = ring->dev;
406	struct drm_i915_private *dev_priv = dev->dev_private;
407	int ret = init_ring_common(ring);
408
409	if (INTEL_INFO(dev)->gen > 3) {
410		int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
411		I915_WRITE(MI_MODE, mode);
412		if (IS_GEN7(dev))
413			I915_WRITE(GFX_MODE_GEN7,
414				   GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
415				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
416	}
417
418	if (INTEL_INFO(dev)->gen >= 5) {
419		ret = init_pipe_control(ring);
420		if (ret)
421			return ret;
422	}
423
424
425	if (IS_GEN6(dev)) {
426		/* From the Sandybridge PRM, volume 1 part 3, page 24:
427		 * "If this bit is set, STCunit will have LRA as replacement
428		 *  policy. [...] This bit must be reset.  LRA replacement
429		 *  policy is not supported."
430		 */
431		I915_WRITE(CACHE_MODE_0,
432			   CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT);
433	}
434
435	if (INTEL_INFO(dev)->gen >= 6) {
436		I915_WRITE(INSTPM,
437			   INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
438	}
439
440	return ret;
441}
442
443static void render_ring_cleanup(struct intel_ring_buffer *ring)
444{
445	if (!ring->private)
446		return;
447
448	cleanup_pipe_control(ring);
449}
450
451static void
452update_mboxes(struct intel_ring_buffer *ring,
453	    u32 seqno,
454	    u32 mmio_offset)
455{
456	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
457			      MI_SEMAPHORE_GLOBAL_GTT |
458			      MI_SEMAPHORE_REGISTER |
459			      MI_SEMAPHORE_UPDATE);
460	intel_ring_emit(ring, seqno);
461	intel_ring_emit(ring, mmio_offset);
462}
463
464/**
465 * gen6_add_request - Update the semaphore mailbox registers
466 *
467 * @ring - ring that is adding a request
468 * @seqno - return seqno stuck into the ring
469 *
470 * Update the mailbox registers in the *other* rings with the current seqno.
471 * This acts like a signal in the canonical semaphore.
472 */
473static int
474gen6_add_request(struct intel_ring_buffer *ring,
475		 u32 *seqno)
476{
477	u32 mbox1_reg;
478	u32 mbox2_reg;
479	int ret;
480
481	ret = intel_ring_begin(ring, 10);
482	if (ret)
483		return ret;
484
485	mbox1_reg = ring->signal_mbox[0];
486	mbox2_reg = ring->signal_mbox[1];
487
488	*seqno = i915_gem_next_request_seqno(ring);
489
490	update_mboxes(ring, *seqno, mbox1_reg);
491	update_mboxes(ring, *seqno, mbox2_reg);
492	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
493	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
494	intel_ring_emit(ring, *seqno);
495	intel_ring_emit(ring, MI_USER_INTERRUPT);
496	intel_ring_advance(ring);
497
498	return 0;
499}
500
501/**
502 * intel_ring_sync - sync the waiter to the signaller on seqno
503 *
504 * @waiter - ring that is waiting
505 * @signaller - ring which has, or will signal
506 * @seqno - seqno which the waiter will block on
507 */
508static int
509intel_ring_sync(struct intel_ring_buffer *waiter,
510		struct intel_ring_buffer *signaller,
511		int ring,
512		u32 seqno)
513{
514	int ret;
515	u32 dw1 = MI_SEMAPHORE_MBOX |
516		  MI_SEMAPHORE_COMPARE |
517		  MI_SEMAPHORE_REGISTER;
518
519	ret = intel_ring_begin(waiter, 4);
520	if (ret)
521		return ret;
522
523	intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
524	intel_ring_emit(waiter, seqno);
525	intel_ring_emit(waiter, 0);
526	intel_ring_emit(waiter, MI_NOOP);
527	intel_ring_advance(waiter);
528
529	return 0;
530}
531
532int render_ring_sync_to(struct intel_ring_buffer *waiter,
533    struct intel_ring_buffer *signaller, u32 seqno);
534int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
535    struct intel_ring_buffer *signaller, u32 seqno);
536int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
537    struct intel_ring_buffer *signaller, u32 seqno);
538
539/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
540int
541render_ring_sync_to(struct intel_ring_buffer *waiter,
542		    struct intel_ring_buffer *signaller,
543		    u32 seqno)
544{
545	KASSERT(signaller->semaphore_register[RCS] != MI_SEMAPHORE_SYNC_INVALID,
546	    ("valid RCS semaphore"));
547	return intel_ring_sync(waiter,
548			       signaller,
549			       RCS,
550			       seqno);
551}
552
553/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
554int
555gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
556		      struct intel_ring_buffer *signaller,
557		      u32 seqno)
558{
559	KASSERT(signaller->semaphore_register[VCS] != MI_SEMAPHORE_SYNC_INVALID,
560	    ("Valid VCS semaphore"));
561	return intel_ring_sync(waiter,
562			       signaller,
563			       VCS,
564			       seqno);
565}
566
567/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
568int
569gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
570		      struct intel_ring_buffer *signaller,
571		      u32 seqno)
572{
573	KASSERT(signaller->semaphore_register[BCS] != MI_SEMAPHORE_SYNC_INVALID,
574	    ("Valid BCS semaphore"));
575	return intel_ring_sync(waiter,
576			       signaller,
577			       BCS,
578			       seqno);
579}
580
581#define PIPE_CONTROL_FLUSH(ring__, addr__)					\
582do {									\
583	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
584		 PIPE_CONTROL_DEPTH_STALL);				\
585	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
586	intel_ring_emit(ring__, 0);							\
587	intel_ring_emit(ring__, 0);							\
588} while (0)
589
590static int
591pc_render_add_request(struct intel_ring_buffer *ring,
592		      uint32_t *result)
593{
594	u32 seqno = i915_gem_next_request_seqno(ring);
595	struct pipe_control *pc = ring->private;
596	u32 scratch_addr = pc->gtt_offset + 128;
597	int ret;
598
599	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
600	 * incoherent with writes to memory, i.e. completely fubar,
601	 * so we need to use PIPE_NOTIFY instead.
602	 *
603	 * However, we also need to workaround the qword write
604	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
605	 * memory before requesting an interrupt.
606	 */
607	ret = intel_ring_begin(ring, 32);
608	if (ret)
609		return ret;
610
611	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
612			PIPE_CONTROL_WRITE_FLUSH |
613			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
614	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
615	intel_ring_emit(ring, seqno);
616	intel_ring_emit(ring, 0);
617	PIPE_CONTROL_FLUSH(ring, scratch_addr);
618	scratch_addr += 128; /* write to separate cachelines */
619	PIPE_CONTROL_FLUSH(ring, scratch_addr);
620	scratch_addr += 128;
621	PIPE_CONTROL_FLUSH(ring, scratch_addr);
622	scratch_addr += 128;
623	PIPE_CONTROL_FLUSH(ring, scratch_addr);
624	scratch_addr += 128;
625	PIPE_CONTROL_FLUSH(ring, scratch_addr);
626	scratch_addr += 128;
627	PIPE_CONTROL_FLUSH(ring, scratch_addr);
628	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
629			PIPE_CONTROL_WRITE_FLUSH |
630			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
631			PIPE_CONTROL_NOTIFY);
632	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
633	intel_ring_emit(ring, seqno);
634	intel_ring_emit(ring, 0);
635	intel_ring_advance(ring);
636
637	*result = seqno;
638	return 0;
639}
640
641static int
642render_ring_add_request(struct intel_ring_buffer *ring,
643			uint32_t *result)
644{
645	u32 seqno = i915_gem_next_request_seqno(ring);
646	int ret;
647
648	ret = intel_ring_begin(ring, 4);
649	if (ret)
650		return ret;
651
652	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
653	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
654	intel_ring_emit(ring, seqno);
655	intel_ring_emit(ring, MI_USER_INTERRUPT);
656	intel_ring_advance(ring);
657
658	*result = seqno;
659	return 0;
660}
661
662 static u32
663gen6_ring_get_seqno(struct intel_ring_buffer *ring)
664{
665	struct drm_device *dev = ring->dev;
666
667	/* Workaround to force correct ordering between irq and seqno writes on
668	 * ivb (and maybe also on snb) by reading from a CS register (like
669	 * ACTHD) before reading the status page. */
670	if (/* IS_GEN6(dev) || */IS_GEN7(dev))
671		intel_ring_get_active_head(ring);
672	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
673}
674
675static uint32_t
676ring_get_seqno(struct intel_ring_buffer *ring)
677{
678	if (ring->status_page.page_addr == NULL)
679		return (-1);
680	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
681}
682
683static uint32_t
684pc_render_get_seqno(struct intel_ring_buffer *ring)
685{
686	struct pipe_control *pc = ring->private;
687	if (pc != NULL)
688		return pc->cpu_page[0];
689	else
690		return (-1);
691}
692
693static void
694ironlake_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
695{
696	dev_priv->gt_irq_mask &= ~mask;
697	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
698	POSTING_READ(GTIMR);
699}
700
701static void
702ironlake_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
703{
704	dev_priv->gt_irq_mask |= mask;
705	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
706	POSTING_READ(GTIMR);
707}
708
709static void
710i915_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
711{
712	dev_priv->irq_mask &= ~mask;
713	I915_WRITE(IMR, dev_priv->irq_mask);
714	POSTING_READ(IMR);
715}
716
717static void
718i915_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
719{
720	dev_priv->irq_mask |= mask;
721	I915_WRITE(IMR, dev_priv->irq_mask);
722	POSTING_READ(IMR);
723}
724
725static bool
726render_ring_get_irq(struct intel_ring_buffer *ring)
727{
728	struct drm_device *dev = ring->dev;
729	drm_i915_private_t *dev_priv = dev->dev_private;
730
731	if (!dev->irq_enabled)
732		return false;
733
734	mtx_assert(&ring->irq_lock, MA_OWNED);
735	if (ring->irq_refcount++ == 0) {
736		if (HAS_PCH_SPLIT(dev))
737			ironlake_enable_irq(dev_priv,
738					    GT_PIPE_NOTIFY | GT_USER_INTERRUPT);
739		else
740			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
741	}
742
743	return true;
744}
745
746static void
747render_ring_put_irq(struct intel_ring_buffer *ring)
748{
749	struct drm_device *dev = ring->dev;
750	drm_i915_private_t *dev_priv = dev->dev_private;
751
752	mtx_assert(&ring->irq_lock, MA_OWNED);
753	if (--ring->irq_refcount == 0) {
754		if (HAS_PCH_SPLIT(dev))
755			ironlake_disable_irq(dev_priv,
756					     GT_USER_INTERRUPT |
757					     GT_PIPE_NOTIFY);
758		else
759			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
760	}
761}
762
763void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
764{
765	struct drm_device *dev = ring->dev;
766	drm_i915_private_t *dev_priv = dev->dev_private;
767	uint32_t mmio = 0;
768
769	/* The ring status page addresses are no longer next to the rest of
770	 * the ring registers as of gen7.
771	 */
772	if (IS_GEN7(dev)) {
773		switch (ring->id) {
774		case RCS:
775			mmio = RENDER_HWS_PGA_GEN7;
776			break;
777		case BCS:
778			mmio = BLT_HWS_PGA_GEN7;
779			break;
780		case VCS:
781			mmio = BSD_HWS_PGA_GEN7;
782			break;
783		}
784	} else if (IS_GEN6(dev)) {
785		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
786	} else {
787		mmio = RING_HWS_PGA(ring->mmio_base);
788	}
789
790	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
791	POSTING_READ(mmio);
792}
793
794static int
795bsd_ring_flush(struct intel_ring_buffer *ring,
796	       uint32_t     invalidate_domains,
797	       uint32_t     flush_domains)
798{
799	int ret;
800
801	ret = intel_ring_begin(ring, 2);
802	if (ret)
803		return ret;
804
805	intel_ring_emit(ring, MI_FLUSH);
806	intel_ring_emit(ring, MI_NOOP);
807	intel_ring_advance(ring);
808	return 0;
809}
810
811static int
812ring_add_request(struct intel_ring_buffer *ring,
813		 uint32_t *result)
814{
815	uint32_t seqno;
816	int ret;
817
818	ret = intel_ring_begin(ring, 4);
819	if (ret)
820		return ret;
821
822	seqno = i915_gem_next_request_seqno(ring);
823
824	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
825	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
826	intel_ring_emit(ring, seqno);
827	intel_ring_emit(ring, MI_USER_INTERRUPT);
828	intel_ring_advance(ring);
829
830	*result = seqno;
831	return 0;
832}
833
834static bool
835gen6_ring_get_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
836{
837	struct drm_device *dev = ring->dev;
838	drm_i915_private_t *dev_priv = dev->dev_private;
839
840	if (!dev->irq_enabled)
841	       return false;
842
843	gen6_gt_force_wake_get(dev_priv);
844
845	mtx_assert(&ring->irq_lock, MA_OWNED);
846	if (ring->irq_refcount++ == 0) {
847		ring->irq_mask &= ~rflag;
848		I915_WRITE_IMR(ring, ring->irq_mask);
849		ironlake_enable_irq(dev_priv, gflag);
850	}
851
852	return true;
853}
854
855static void
856gen6_ring_put_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
857{
858	struct drm_device *dev = ring->dev;
859	drm_i915_private_t *dev_priv = dev->dev_private;
860
861	mtx_assert(&ring->irq_lock, MA_OWNED);
862	if (--ring->irq_refcount == 0) {
863		ring->irq_mask |= rflag;
864		I915_WRITE_IMR(ring, ring->irq_mask);
865		ironlake_disable_irq(dev_priv, gflag);
866	}
867
868	gen6_gt_force_wake_put(dev_priv);
869}
870
871static bool
872bsd_ring_get_irq(struct intel_ring_buffer *ring)
873{
874	struct drm_device *dev = ring->dev;
875	drm_i915_private_t *dev_priv = dev->dev_private;
876
877	if (!dev->irq_enabled)
878		return false;
879
880	mtx_assert(&ring->irq_lock, MA_OWNED);
881	if (ring->irq_refcount++ == 0) {
882		if (IS_G4X(dev))
883			i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
884		else
885			ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
886	}
887
888	return true;
889}
890static void
891bsd_ring_put_irq(struct intel_ring_buffer *ring)
892{
893	struct drm_device *dev = ring->dev;
894	drm_i915_private_t *dev_priv = dev->dev_private;
895
896	mtx_assert(&ring->irq_lock, MA_OWNED);
897	if (--ring->irq_refcount == 0) {
898		if (IS_G4X(dev))
899			i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
900		else
901			ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
902	}
903}
904
905static int
906ring_dispatch_execbuffer(struct intel_ring_buffer *ring, uint32_t offset,
907    uint32_t length)
908{
909	int ret;
910
911	ret = intel_ring_begin(ring, 2);
912	if (ret)
913		return ret;
914
915	intel_ring_emit(ring,
916			MI_BATCH_BUFFER_START | (2 << 6) |
917			MI_BATCH_NON_SECURE_I965);
918	intel_ring_emit(ring, offset);
919	intel_ring_advance(ring);
920
921	return 0;
922}
923
924static int
925render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
926				uint32_t offset, uint32_t len)
927{
928	struct drm_device *dev = ring->dev;
929	int ret;
930
931	if (IS_I830(dev) || IS_845G(dev)) {
932		ret = intel_ring_begin(ring, 4);
933		if (ret)
934			return ret;
935
936		intel_ring_emit(ring, MI_BATCH_BUFFER);
937		intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
938		intel_ring_emit(ring, offset + len - 8);
939		intel_ring_emit(ring, 0);
940	} else {
941		ret = intel_ring_begin(ring, 2);
942		if (ret)
943			return ret;
944
945		if (INTEL_INFO(dev)->gen >= 4) {
946			intel_ring_emit(ring,
947					MI_BATCH_BUFFER_START | (2 << 6) |
948					MI_BATCH_NON_SECURE_I965);
949			intel_ring_emit(ring, offset);
950		} else {
951			intel_ring_emit(ring,
952					MI_BATCH_BUFFER_START | (2 << 6));
953			intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
954		}
955	}
956	intel_ring_advance(ring);
957
958	return 0;
959}
960
961static void cleanup_status_page(struct intel_ring_buffer *ring)
962{
963	drm_i915_private_t *dev_priv = ring->dev->dev_private;
964	struct drm_i915_gem_object *obj;
965
966	obj = ring->status_page.obj;
967	if (obj == NULL)
968		return;
969
970	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
971	kmem_free(kernel_map, (vm_offset_t)ring->status_page.page_addr,
972	    PAGE_SIZE);
973	i915_gem_object_unpin(obj);
974	drm_gem_object_unreference(&obj->base);
975	ring->status_page.obj = NULL;
976
977	memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
978}
979
980static int init_status_page(struct intel_ring_buffer *ring)
981{
982	struct drm_device *dev = ring->dev;
983	drm_i915_private_t *dev_priv = dev->dev_private;
984	struct drm_i915_gem_object *obj;
985	int ret;
986
987	obj = i915_gem_alloc_object(dev, 4096);
988	if (obj == NULL) {
989		DRM_ERROR("Failed to allocate status page\n");
990		ret = -ENOMEM;
991		goto err;
992	}
993
994	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
995
996	ret = i915_gem_object_pin(obj, 4096, true);
997	if (ret != 0) {
998		goto err_unref;
999	}
1000
1001	ring->status_page.gfx_addr = obj->gtt_offset;
1002	ring->status_page.page_addr = (void *)kmem_alloc_nofault(kernel_map,
1003	    PAGE_SIZE);
1004	if (ring->status_page.page_addr == NULL) {
1005		memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
1006		goto err_unpin;
1007	}
1008	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1009	    1);
1010	pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
1011	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1012	ring->status_page.obj = obj;
1013	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1014
1015	intel_ring_setup_status_page(ring);
1016	DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n",
1017			ring->name, ring->status_page.gfx_addr);
1018
1019	return 0;
1020
1021err_unpin:
1022	i915_gem_object_unpin(obj);
1023err_unref:
1024	drm_gem_object_unreference(&obj->base);
1025err:
1026	return ret;
1027}
1028
1029static
1030int intel_init_ring_buffer(struct drm_device *dev,
1031			   struct intel_ring_buffer *ring)
1032{
1033	struct drm_i915_gem_object *obj;
1034	int ret;
1035
1036	ring->dev = dev;
1037	INIT_LIST_HEAD(&ring->active_list);
1038	INIT_LIST_HEAD(&ring->request_list);
1039	INIT_LIST_HEAD(&ring->gpu_write_list);
1040
1041	mtx_init(&ring->irq_lock, "ringb", NULL, MTX_DEF);
1042	ring->irq_mask = ~0;
1043
1044	if (I915_NEED_GFX_HWS(dev)) {
1045		ret = init_status_page(ring);
1046		if (ret)
1047			return ret;
1048	}
1049
1050	obj = i915_gem_alloc_object(dev, ring->size);
1051	if (obj == NULL) {
1052		DRM_ERROR("Failed to allocate ringbuffer\n");
1053		ret = -ENOMEM;
1054		goto err_hws;
1055	}
1056
1057	ring->obj = obj;
1058
1059	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1060	if (ret)
1061		goto err_unref;
1062
1063	ring->map.size = ring->size;
1064	ring->map.offset = dev->agp->base + obj->gtt_offset;
1065	ring->map.type = 0;
1066	ring->map.flags = 0;
1067	ring->map.mtrr = 0;
1068
1069	drm_core_ioremap_wc(&ring->map, dev);
1070	if (ring->map.virtual == NULL) {
1071		DRM_ERROR("Failed to map ringbuffer.\n");
1072		ret = -EINVAL;
1073		goto err_unpin;
1074	}
1075
1076	ring->virtual_start = ring->map.virtual;
1077	ret = ring->init(ring);
1078	if (ret)
1079		goto err_unmap;
1080
1081	/* Workaround an erratum on the i830 which causes a hang if
1082	 * the TAIL pointer points to within the last 2 cachelines
1083	 * of the buffer.
1084	 */
1085	ring->effective_size = ring->size;
1086	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1087		ring->effective_size -= 128;
1088
1089	return 0;
1090
1091err_unmap:
1092	drm_core_ioremapfree(&ring->map, dev);
1093err_unpin:
1094	i915_gem_object_unpin(obj);
1095err_unref:
1096	drm_gem_object_unreference(&obj->base);
1097	ring->obj = NULL;
1098err_hws:
1099	cleanup_status_page(ring);
1100	return ret;
1101}
1102
1103void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1104{
1105	struct drm_i915_private *dev_priv;
1106	int ret;
1107
1108	if (ring->obj == NULL)
1109		return;
1110
1111	/* Disable the ring buffer. The ring must be idle at this point */
1112	dev_priv = ring->dev->dev_private;
1113	ret = intel_wait_ring_idle(ring);
1114	I915_WRITE_CTL(ring, 0);
1115
1116	drm_core_ioremapfree(&ring->map, ring->dev);
1117
1118	i915_gem_object_unpin(ring->obj);
1119	drm_gem_object_unreference(&ring->obj->base);
1120	ring->obj = NULL;
1121
1122	if (ring->cleanup)
1123		ring->cleanup(ring);
1124
1125	cleanup_status_page(ring);
1126}
1127
1128static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1129{
1130	unsigned int *virt;
1131	int rem = ring->size - ring->tail;
1132
1133	if (ring->space < rem) {
1134		int ret = intel_wait_ring_buffer(ring, rem);
1135		if (ret)
1136			return ret;
1137	}
1138
1139	virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1140	rem /= 8;
1141	while (rem--) {
1142		*virt++ = MI_NOOP;
1143		*virt++ = MI_NOOP;
1144	}
1145
1146	ring->tail = 0;
1147	ring->space = ring_space(ring);
1148
1149	return 0;
1150}
1151
1152static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1153{
1154	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1155	bool was_interruptible;
1156	int ret;
1157
1158	/* XXX As we have not yet audited all the paths to check that
1159	 * they are ready for ERESTARTSYS from intel_ring_begin, do not
1160	 * allow us to be interruptible by a signal.
1161	 */
1162	was_interruptible = dev_priv->mm.interruptible;
1163	dev_priv->mm.interruptible = false;
1164
1165	ret = i915_wait_request(ring, seqno, true);
1166
1167	dev_priv->mm.interruptible = was_interruptible;
1168
1169	return ret;
1170}
1171
1172static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1173{
1174	struct drm_i915_gem_request *request;
1175	u32 seqno = 0;
1176	int ret;
1177
1178	i915_gem_retire_requests_ring(ring);
1179
1180	if (ring->last_retired_head != -1) {
1181		ring->head = ring->last_retired_head;
1182		ring->last_retired_head = -1;
1183		ring->space = ring_space(ring);
1184		if (ring->space >= n)
1185			return 0;
1186	}
1187
1188	list_for_each_entry(request, &ring->request_list, list) {
1189		int space;
1190
1191		if (request->tail == -1)
1192			continue;
1193
1194		space = request->tail - (ring->tail + 8);
1195		if (space < 0)
1196			space += ring->size;
1197		if (space >= n) {
1198			seqno = request->seqno;
1199			break;
1200		}
1201
1202		/* Consume this request in case we need more space than
1203		 * is available and so need to prevent a race between
1204		 * updating last_retired_head and direct reads of
1205		 * I915_RING_HEAD. It also provides a nice sanity check.
1206		 */
1207		request->tail = -1;
1208	}
1209
1210	if (seqno == 0)
1211		return -ENOSPC;
1212
1213	ret = intel_ring_wait_seqno(ring, seqno);
1214	if (ret)
1215		return ret;
1216
1217	if (ring->last_retired_head == -1)
1218		return -ENOSPC;
1219
1220	ring->head = ring->last_retired_head;
1221	ring->last_retired_head = -1;
1222	ring->space = ring_space(ring);
1223	if (ring->space < n)
1224		return -ENOSPC;
1225
1226	return 0;
1227}
1228
1229int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1230{
1231	struct drm_device *dev = ring->dev;
1232	struct drm_i915_private *dev_priv = dev->dev_private;
1233	int end;
1234	int ret;
1235
1236	ret = intel_ring_wait_request(ring, n);
1237	if (ret != -ENOSPC)
1238		return ret;
1239
1240	CTR1(KTR_DRM, "ring_wait_begin %s", ring->name);
1241	if (drm_core_check_feature(dev, DRIVER_GEM))
1242		/* With GEM the hangcheck timer should kick us out of the loop,
1243		 * leaving it early runs the risk of corrupting GEM state (due
1244		 * to running on almost untested codepaths). But on resume
1245		 * timers don't work yet, so prevent a complete hang in that
1246		 * case by choosing an insanely large timeout. */
1247		end = ticks + hz * 60;
1248	else
1249		end = ticks + hz * 3;
1250	do {
1251		ring->head = I915_READ_HEAD(ring);
1252		ring->space = ring_space(ring);
1253		if (ring->space >= n) {
1254			CTR1(KTR_DRM, "ring_wait_end %s", ring->name);
1255			return 0;
1256		}
1257
1258#if 0
1259		if (dev->primary->master) {
1260			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1261			if (master_priv->sarea_priv)
1262				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1263		}
1264#else
1265		if (dev_priv->sarea_priv)
1266			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1267#endif
1268
1269		pause("915rng", 1);
1270		if (atomic_load_acq_32(&dev_priv->mm.wedged) != 0) {
1271			CTR1(KTR_DRM, "ring_wait_end %s wedged", ring->name);
1272			return -EAGAIN;
1273		}
1274	} while (!time_after(ticks, end));
1275	CTR1(KTR_DRM, "ring_wait_end %s busy", ring->name);
1276	return -EBUSY;
1277}
1278
1279int intel_ring_begin(struct intel_ring_buffer *ring,
1280		     int num_dwords)
1281{
1282	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1283	int n = 4*num_dwords;
1284	int ret;
1285
1286	if (atomic_load_acq_int(&dev_priv->mm.wedged))
1287		return -EIO;
1288
1289	if (ring->tail + n > ring->effective_size) {
1290		ret = intel_wrap_ring_buffer(ring);
1291		if (ret != 0)
1292			return ret;
1293	}
1294
1295	if (ring->space < n) {
1296		ret = intel_wait_ring_buffer(ring, n);
1297		if (ret != 0)
1298			return ret;
1299	}
1300
1301	ring->space -= n;
1302	return 0;
1303}
1304
1305void intel_ring_advance(struct intel_ring_buffer *ring)
1306{
1307	ring->tail &= ring->size - 1;
1308	ring->write_tail(ring, ring->tail);
1309}
1310
1311static const struct intel_ring_buffer render_ring = {
1312	.name			= "render ring",
1313	.id			= RCS,
1314	.mmio_base		= RENDER_RING_BASE,
1315	.size			= 32 * PAGE_SIZE,
1316	.init			= init_render_ring,
1317	.write_tail		= ring_write_tail,
1318	.flush			= render_ring_flush,
1319	.add_request		= render_ring_add_request,
1320	.get_seqno		= ring_get_seqno,
1321	.irq_get		= render_ring_get_irq,
1322	.irq_put		= render_ring_put_irq,
1323	.dispatch_execbuffer	= render_ring_dispatch_execbuffer,
1324	.cleanup		= render_ring_cleanup,
1325	.sync_to		= render_ring_sync_to,
1326	.semaphore_register	= {MI_SEMAPHORE_SYNC_INVALID,
1327				   MI_SEMAPHORE_SYNC_RV,
1328				   MI_SEMAPHORE_SYNC_RB},
1329	.signal_mbox		= {GEN6_VRSYNC, GEN6_BRSYNC},
1330};
1331
1332/* ring buffer for bit-stream decoder */
1333
1334static const struct intel_ring_buffer bsd_ring = {
1335	.name                   = "bsd ring",
1336	.id			= VCS,
1337	.mmio_base		= BSD_RING_BASE,
1338	.size			= 32 * PAGE_SIZE,
1339	.init			= init_ring_common,
1340	.write_tail		= ring_write_tail,
1341	.flush			= bsd_ring_flush,
1342	.add_request		= ring_add_request,
1343	.get_seqno		= ring_get_seqno,
1344	.irq_get		= bsd_ring_get_irq,
1345	.irq_put		= bsd_ring_put_irq,
1346	.dispatch_execbuffer	= ring_dispatch_execbuffer,
1347};
1348
1349
1350static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1351				     uint32_t value)
1352{
1353	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1354
1355	/* Every tail move must follow the sequence below */
1356	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1357	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1358	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
1359	I915_WRITE(GEN6_BSD_RNCID, 0x0);
1360
1361	if (_intel_wait_for(ring->dev,
1362	    (I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1363	     GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0, 50,
1364	    true, "915g6i") != 0)
1365		DRM_ERROR("timed out waiting for IDLE Indicator\n");
1366
1367	I915_WRITE_TAIL(ring, value);
1368	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1369	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1370	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
1371}
1372
1373static int gen6_ring_flush(struct intel_ring_buffer *ring,
1374			   uint32_t invalidate, uint32_t flush)
1375{
1376	uint32_t cmd;
1377	int ret;
1378
1379	ret = intel_ring_begin(ring, 4);
1380	if (ret)
1381		return ret;
1382
1383	cmd = MI_FLUSH_DW;
1384	if (invalidate & I915_GEM_GPU_DOMAINS)
1385		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1386	intel_ring_emit(ring, cmd);
1387	intel_ring_emit(ring, 0);
1388	intel_ring_emit(ring, 0);
1389	intel_ring_emit(ring, MI_NOOP);
1390	intel_ring_advance(ring);
1391	return 0;
1392}
1393
1394static int
1395gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1396			      uint32_t offset, uint32_t len)
1397{
1398	int ret;
1399
1400	ret = intel_ring_begin(ring, 2);
1401	if (ret)
1402		return ret;
1403
1404	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1405	/* bit0-7 is the length on GEN6+ */
1406	intel_ring_emit(ring, offset);
1407	intel_ring_advance(ring);
1408
1409	return 0;
1410}
1411
1412static bool
1413gen6_render_ring_get_irq(struct intel_ring_buffer *ring)
1414{
1415	return gen6_ring_get_irq(ring,
1416				 GT_USER_INTERRUPT,
1417				 GEN6_RENDER_USER_INTERRUPT);
1418}
1419
1420static void
1421gen6_render_ring_put_irq(struct intel_ring_buffer *ring)
1422{
1423	return gen6_ring_put_irq(ring,
1424				 GT_USER_INTERRUPT,
1425				 GEN6_RENDER_USER_INTERRUPT);
1426}
1427
1428static bool
1429gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring)
1430{
1431	return gen6_ring_get_irq(ring,
1432				 GT_GEN6_BSD_USER_INTERRUPT,
1433				 GEN6_BSD_USER_INTERRUPT);
1434}
1435
1436static void
1437gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring)
1438{
1439	return gen6_ring_put_irq(ring,
1440				 GT_GEN6_BSD_USER_INTERRUPT,
1441				 GEN6_BSD_USER_INTERRUPT);
1442}
1443
1444/* ring buffer for Video Codec for Gen6+ */
1445static const struct intel_ring_buffer gen6_bsd_ring = {
1446	.name			= "gen6 bsd ring",
1447	.id			= VCS,
1448	.mmio_base		= GEN6_BSD_RING_BASE,
1449	.size			= 32 * PAGE_SIZE,
1450	.init			= init_ring_common,
1451	.write_tail		= gen6_bsd_ring_write_tail,
1452	.flush			= gen6_ring_flush,
1453	.add_request		= gen6_add_request,
1454	.get_seqno		= gen6_ring_get_seqno,
1455	.irq_get		= gen6_bsd_ring_get_irq,
1456	.irq_put		= gen6_bsd_ring_put_irq,
1457	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1458	.sync_to		= gen6_bsd_ring_sync_to,
1459	.semaphore_register	= {MI_SEMAPHORE_SYNC_VR,
1460				   MI_SEMAPHORE_SYNC_INVALID,
1461				   MI_SEMAPHORE_SYNC_VB},
1462	.signal_mbox		= {GEN6_RVSYNC, GEN6_BVSYNC},
1463};
1464
1465/* Blitter support (SandyBridge+) */
1466
1467static bool
1468blt_ring_get_irq(struct intel_ring_buffer *ring)
1469{
1470	return gen6_ring_get_irq(ring,
1471				 GT_BLT_USER_INTERRUPT,
1472				 GEN6_BLITTER_USER_INTERRUPT);
1473}
1474
1475static void
1476blt_ring_put_irq(struct intel_ring_buffer *ring)
1477{
1478	gen6_ring_put_irq(ring,
1479			  GT_BLT_USER_INTERRUPT,
1480			  GEN6_BLITTER_USER_INTERRUPT);
1481}
1482
1483static int blt_ring_flush(struct intel_ring_buffer *ring,
1484			  uint32_t invalidate, uint32_t flush)
1485{
1486	uint32_t cmd;
1487	int ret;
1488
1489	ret = intel_ring_begin(ring, 4);
1490	if (ret)
1491		return ret;
1492
1493	cmd = MI_FLUSH_DW;
1494	if (invalidate & I915_GEM_DOMAIN_RENDER)
1495		cmd |= MI_INVALIDATE_TLB;
1496	intel_ring_emit(ring, cmd);
1497	intel_ring_emit(ring, 0);
1498	intel_ring_emit(ring, 0);
1499	intel_ring_emit(ring, MI_NOOP);
1500	intel_ring_advance(ring);
1501	return 0;
1502}
1503
1504static const struct intel_ring_buffer gen6_blt_ring = {
1505	.name			= "blt ring",
1506	.id			= BCS,
1507	.mmio_base		= BLT_RING_BASE,
1508	.size			= 32 * PAGE_SIZE,
1509	.init			= init_ring_common,
1510	.write_tail		= ring_write_tail,
1511	.flush			= blt_ring_flush,
1512	.add_request		= gen6_add_request,
1513	.get_seqno		= gen6_ring_get_seqno,
1514	.irq_get		= blt_ring_get_irq,
1515	.irq_put		= blt_ring_put_irq,
1516	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1517	.sync_to		= gen6_blt_ring_sync_to,
1518	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
1519				   MI_SEMAPHORE_SYNC_BV,
1520				   MI_SEMAPHORE_SYNC_INVALID},
1521	.signal_mbox		= {GEN6_RBSYNC, GEN6_VBSYNC},
1522};
1523
1524int intel_init_render_ring_buffer(struct drm_device *dev)
1525{
1526	drm_i915_private_t *dev_priv = dev->dev_private;
1527	struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1528
1529	*ring = render_ring;
1530	if (INTEL_INFO(dev)->gen >= 6) {
1531		ring->add_request = gen6_add_request;
1532		ring->flush = gen6_render_ring_flush;
1533		ring->irq_get = gen6_render_ring_get_irq;
1534		ring->irq_put = gen6_render_ring_put_irq;
1535		ring->get_seqno = gen6_ring_get_seqno;
1536	} else if (IS_GEN5(dev)) {
1537		ring->add_request = pc_render_add_request;
1538		ring->get_seqno = pc_render_get_seqno;
1539	}
1540
1541	if (!I915_NEED_GFX_HWS(dev)) {
1542		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1543		memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1544	}
1545
1546	return intel_init_ring_buffer(dev, ring);
1547}
1548
1549int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start,
1550    uint32_t size)
1551{
1552	drm_i915_private_t *dev_priv = dev->dev_private;
1553	struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1554
1555	*ring = render_ring;
1556	if (INTEL_INFO(dev)->gen >= 6) {
1557		ring->add_request = gen6_add_request;
1558		ring->irq_get = gen6_render_ring_get_irq;
1559		ring->irq_put = gen6_render_ring_put_irq;
1560	} else if (IS_GEN5(dev)) {
1561		ring->add_request = pc_render_add_request;
1562		ring->get_seqno = pc_render_get_seqno;
1563	}
1564
1565	ring->dev = dev;
1566	INIT_LIST_HEAD(&ring->active_list);
1567	INIT_LIST_HEAD(&ring->request_list);
1568	INIT_LIST_HEAD(&ring->gpu_write_list);
1569
1570	ring->size = size;
1571	ring->effective_size = ring->size;
1572	if (IS_I830(ring->dev))
1573		ring->effective_size -= 128;
1574
1575	ring->map.offset = start;
1576	ring->map.size = size;
1577	ring->map.type = 0;
1578	ring->map.flags = 0;
1579	ring->map.mtrr = 0;
1580
1581	drm_core_ioremap_wc(&ring->map, dev);
1582	if (ring->map.virtual == NULL) {
1583		DRM_ERROR("can not ioremap virtual address for"
1584			  " ring buffer\n");
1585		return -ENOMEM;
1586	}
1587
1588	ring->virtual_start = (void *)ring->map.virtual;
1589	return 0;
1590}
1591
1592int intel_init_bsd_ring_buffer(struct drm_device *dev)
1593{
1594	drm_i915_private_t *dev_priv = dev->dev_private;
1595	struct intel_ring_buffer *ring = &dev_priv->rings[VCS];
1596
1597	if (IS_GEN6(dev) || IS_GEN7(dev))
1598		*ring = gen6_bsd_ring;
1599	else
1600		*ring = bsd_ring;
1601
1602	return intel_init_ring_buffer(dev, ring);
1603}
1604
1605int intel_init_blt_ring_buffer(struct drm_device *dev)
1606{
1607	drm_i915_private_t *dev_priv = dev->dev_private;
1608	struct intel_ring_buffer *ring = &dev_priv->rings[BCS];
1609
1610	*ring = gen6_blt_ring;
1611
1612	return intel_init_ring_buffer(dev, ring);
1613}
1614