intel_ringbuffer.c revision 254025
1/*
2 * Copyright �� 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *    Zou Nan hai <nanhai.zou@intel.com>
26 *    Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 254025 2013-08-07 06:21:20Z jeff $");
32
33#include <dev/drm2/drmP.h>
34#include <dev/drm2/drm.h>
35#include <dev/drm2/i915/i915_drm.h>
36#include <dev/drm2/i915/i915_drv.h>
37#include <dev/drm2/i915/intel_drv.h>
38#include <dev/drm2/i915/intel_ringbuffer.h>
39#include <sys/sched.h>
40#include <sys/sf_buf.h>
41
42/*
43 * 965+ support PIPE_CONTROL commands, which provide finer grained control
44 * over cache flushing.
45 */
46struct pipe_control {
47	struct drm_i915_gem_object *obj;
48	volatile u32 *cpu_page;
49	u32 gtt_offset;
50};
51
52void
53i915_trace_irq_get(struct intel_ring_buffer *ring, uint32_t seqno)
54{
55
56	if (ring->trace_irq_seqno == 0) {
57		mtx_lock(&ring->irq_lock);
58		if (ring->irq_get(ring))
59			ring->trace_irq_seqno = seqno;
60		mtx_unlock(&ring->irq_lock);
61	}
62}
63
64static inline int ring_space(struct intel_ring_buffer *ring)
65{
66	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
67	if (space < 0)
68		space += ring->size;
69	return space;
70}
71
72static int
73render_ring_flush(struct intel_ring_buffer *ring,
74		  uint32_t	invalidate_domains,
75		  uint32_t	flush_domains)
76{
77	struct drm_device *dev = ring->dev;
78	uint32_t cmd;
79	int ret;
80
81	/*
82	 * read/write caches:
83	 *
84	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
85	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
86	 * also flushed at 2d versus 3d pipeline switches.
87	 *
88	 * read-only caches:
89	 *
90	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
91	 * MI_READ_FLUSH is set, and is always flushed on 965.
92	 *
93	 * I915_GEM_DOMAIN_COMMAND may not exist?
94	 *
95	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
96	 * invalidated when MI_EXE_FLUSH is set.
97	 *
98	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
99	 * invalidated with every MI_FLUSH.
100	 *
101	 * TLBs:
102	 *
103	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
104	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
105	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
106	 * are flushed at any MI_FLUSH.
107	 */
108
109	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
110	if ((invalidate_domains|flush_domains) &
111	    I915_GEM_DOMAIN_RENDER)
112		cmd &= ~MI_NO_WRITE_FLUSH;
113	if (INTEL_INFO(dev)->gen < 4) {
114		/*
115		 * On the 965, the sampler cache always gets flushed
116		 * and this bit is reserved.
117		 */
118		if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
119			cmd |= MI_READ_FLUSH;
120	}
121	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
122		cmd |= MI_EXE_FLUSH;
123
124	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
125	    (IS_G4X(dev) || IS_GEN5(dev)))
126		cmd |= MI_INVALIDATE_ISP;
127
128	ret = intel_ring_begin(ring, 2);
129	if (ret)
130		return ret;
131
132	intel_ring_emit(ring, cmd);
133	intel_ring_emit(ring, MI_NOOP);
134	intel_ring_advance(ring);
135
136	return 0;
137}
138
139/**
140 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
141 * implementing two workarounds on gen6.  From section 1.4.7.1
142 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
143 *
144 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
145 * produced by non-pipelined state commands), software needs to first
146 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
147 * 0.
148 *
149 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
150 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
151 *
152 * And the workaround for these two requires this workaround first:
153 *
154 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
155 * BEFORE the pipe-control with a post-sync op and no write-cache
156 * flushes.
157 *
158 * And this last workaround is tricky because of the requirements on
159 * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
160 * volume 2 part 1:
161 *
162 *     "1 of the following must also be set:
163 *      - Render Target Cache Flush Enable ([12] of DW1)
164 *      - Depth Cache Flush Enable ([0] of DW1)
165 *      - Stall at Pixel Scoreboard ([1] of DW1)
166 *      - Depth Stall ([13] of DW1)
167 *      - Post-Sync Operation ([13] of DW1)
168 *      - Notify Enable ([8] of DW1)"
169 *
170 * The cache flushes require the workaround flush that triggered this
171 * one, so we can't use it.  Depth stall would trigger the same.
172 * Post-sync nonzero is what triggered this second workaround, so we
173 * can't use that one either.  Notify enable is IRQs, which aren't
174 * really our business.  That leaves only stall at scoreboard.
175 */
176static int
177intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
178{
179	struct pipe_control *pc = ring->private;
180	u32 scratch_addr = pc->gtt_offset + 128;
181	int ret;
182
183
184	ret = intel_ring_begin(ring, 6);
185	if (ret)
186		return ret;
187
188	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
189	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
190			PIPE_CONTROL_STALL_AT_SCOREBOARD);
191	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
192	intel_ring_emit(ring, 0); /* low dword */
193	intel_ring_emit(ring, 0); /* high dword */
194	intel_ring_emit(ring, MI_NOOP);
195	intel_ring_advance(ring);
196
197	ret = intel_ring_begin(ring, 6);
198	if (ret)
199		return ret;
200
201	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
202	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
203	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
204	intel_ring_emit(ring, 0);
205	intel_ring_emit(ring, 0);
206	intel_ring_emit(ring, MI_NOOP);
207	intel_ring_advance(ring);
208
209	return 0;
210}
211
212static int
213gen6_render_ring_flush(struct intel_ring_buffer *ring,
214                         u32 invalidate_domains, u32 flush_domains)
215{
216	u32 flags = 0;
217	struct pipe_control *pc = ring->private;
218	u32 scratch_addr = pc->gtt_offset + 128;
219	int ret;
220
221	/* Force SNB workarounds for PIPE_CONTROL flushes */
222	intel_emit_post_sync_nonzero_flush(ring);
223
224	/* Just flush everything.  Experiments have shown that reducing the
225	 * number of bits based on the write domains has little performance
226	 * impact.
227	 */
228	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
229	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
230	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
231	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
232	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
233	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
234	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
235
236	ret = intel_ring_begin(ring, 6);
237	if (ret)
238		return ret;
239
240	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
241	intel_ring_emit(ring, flags);
242	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
243	intel_ring_emit(ring, 0); /* lower dword */
244	intel_ring_emit(ring, 0); /* uppwer dword */
245	intel_ring_emit(ring, MI_NOOP);
246	intel_ring_advance(ring);
247
248	return 0;
249}
250
251static void ring_write_tail(struct intel_ring_buffer *ring,
252			    uint32_t value)
253{
254	drm_i915_private_t *dev_priv = ring->dev->dev_private;
255	I915_WRITE_TAIL(ring, value);
256}
257
258u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
259{
260	drm_i915_private_t *dev_priv = ring->dev->dev_private;
261	uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
262			RING_ACTHD(ring->mmio_base) : ACTHD;
263
264	return I915_READ(acthd_reg);
265}
266
267static int init_ring_common(struct intel_ring_buffer *ring)
268{
269	drm_i915_private_t *dev_priv = ring->dev->dev_private;
270	struct drm_i915_gem_object *obj = ring->obj;
271	uint32_t head;
272
273	/* Stop the ring if it's running. */
274	I915_WRITE_CTL(ring, 0);
275	I915_WRITE_HEAD(ring, 0);
276	ring->write_tail(ring, 0);
277
278	/* Initialize the ring. */
279	I915_WRITE_START(ring, obj->gtt_offset);
280	head = I915_READ_HEAD(ring) & HEAD_ADDR;
281
282	/* G45 ring initialization fails to reset head to zero */
283	if (head != 0) {
284		DRM_DEBUG("%s head not reset to zero "
285			      "ctl %08x head %08x tail %08x start %08x\n",
286			      ring->name,
287			      I915_READ_CTL(ring),
288			      I915_READ_HEAD(ring),
289			      I915_READ_TAIL(ring),
290			      I915_READ_START(ring));
291
292		I915_WRITE_HEAD(ring, 0);
293
294		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
295			DRM_ERROR("failed to set %s head to zero "
296				  "ctl %08x head %08x tail %08x start %08x\n",
297				  ring->name,
298				  I915_READ_CTL(ring),
299				  I915_READ_HEAD(ring),
300				  I915_READ_TAIL(ring),
301				  I915_READ_START(ring));
302		}
303	}
304
305	I915_WRITE_CTL(ring,
306			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
307			| RING_VALID);
308
309	/* If the head is still not zero, the ring is dead */
310	if (_intel_wait_for(ring->dev,
311	    (I915_READ_CTL(ring) & RING_VALID) != 0 &&
312	     I915_READ_START(ring) == obj->gtt_offset &&
313	     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0,
314	    50, 1, "915rii")) {
315		DRM_ERROR("%s initialization failed "
316				"ctl %08x head %08x tail %08x start %08x\n",
317				ring->name,
318				I915_READ_CTL(ring),
319				I915_READ_HEAD(ring),
320				I915_READ_TAIL(ring),
321				I915_READ_START(ring));
322		return -EIO;
323	}
324
325	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
326		i915_kernel_lost_context(ring->dev);
327	else {
328		ring->head = I915_READ_HEAD(ring);
329		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
330		ring->space = ring_space(ring);
331	}
332
333	return 0;
334}
335
336static int
337init_pipe_control(struct intel_ring_buffer *ring)
338{
339	struct pipe_control *pc;
340	struct drm_i915_gem_object *obj;
341	int ret;
342
343	if (ring->private)
344		return 0;
345
346	pc = malloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
347	if (!pc)
348		return -ENOMEM;
349
350	obj = i915_gem_alloc_object(ring->dev, 4096);
351	if (obj == NULL) {
352		DRM_ERROR("Failed to allocate seqno page\n");
353		ret = -ENOMEM;
354		goto err;
355	}
356
357	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
358
359	ret = i915_gem_object_pin(obj, 4096, true);
360	if (ret)
361		goto err_unref;
362
363	pc->gtt_offset = obj->gtt_offset;
364	pc->cpu_page = (uint32_t *)kva_alloc(PAGE_SIZE);
365	if (pc->cpu_page == NULL)
366		goto err_unpin;
367	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
368	pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
369	    (vm_offset_t)pc->cpu_page + PAGE_SIZE);
370
371	pc->obj = obj;
372	ring->private = pc;
373	return 0;
374
375err_unpin:
376	i915_gem_object_unpin(obj);
377err_unref:
378	drm_gem_object_unreference(&obj->base);
379err:
380	free(pc, DRM_I915_GEM);
381	return ret;
382}
383
384static void
385cleanup_pipe_control(struct intel_ring_buffer *ring)
386{
387	struct pipe_control *pc = ring->private;
388	struct drm_i915_gem_object *obj;
389
390	if (!ring->private)
391		return;
392
393	obj = pc->obj;
394	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
395	kva_free((uintptr_t)pc->cpu_page, PAGE_SIZE);
396	i915_gem_object_unpin(obj);
397	drm_gem_object_unreference(&obj->base);
398
399	free(pc, DRM_I915_GEM);
400	ring->private = NULL;
401}
402
403static int init_render_ring(struct intel_ring_buffer *ring)
404{
405	struct drm_device *dev = ring->dev;
406	struct drm_i915_private *dev_priv = dev->dev_private;
407	int ret = init_ring_common(ring);
408
409	if (INTEL_INFO(dev)->gen > 3) {
410		int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
411		I915_WRITE(MI_MODE, mode);
412		if (IS_GEN7(dev))
413			I915_WRITE(GFX_MODE_GEN7,
414				   GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
415				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
416	}
417
418	if (INTEL_INFO(dev)->gen >= 5) {
419		ret = init_pipe_control(ring);
420		if (ret)
421			return ret;
422	}
423
424
425	if (IS_GEN6(dev)) {
426		/* From the Sandybridge PRM, volume 1 part 3, page 24:
427		 * "If this bit is set, STCunit will have LRA as replacement
428		 *  policy. [...] This bit must be reset.  LRA replacement
429		 *  policy is not supported."
430		 */
431		I915_WRITE(CACHE_MODE_0,
432			   CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT);
433	}
434
435	if (INTEL_INFO(dev)->gen >= 6) {
436		I915_WRITE(INSTPM,
437			   INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
438	}
439
440	return ret;
441}
442
443static void render_ring_cleanup(struct intel_ring_buffer *ring)
444{
445	if (!ring->private)
446		return;
447
448	cleanup_pipe_control(ring);
449}
450
451static void
452update_mboxes(struct intel_ring_buffer *ring,
453	    u32 seqno,
454	    u32 mmio_offset)
455{
456	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
457			      MI_SEMAPHORE_GLOBAL_GTT |
458			      MI_SEMAPHORE_REGISTER |
459			      MI_SEMAPHORE_UPDATE);
460	intel_ring_emit(ring, seqno);
461	intel_ring_emit(ring, mmio_offset);
462}
463
464/**
465 * gen6_add_request - Update the semaphore mailbox registers
466 *
467 * @ring - ring that is adding a request
468 * @seqno - return seqno stuck into the ring
469 *
470 * Update the mailbox registers in the *other* rings with the current seqno.
471 * This acts like a signal in the canonical semaphore.
472 */
473static int
474gen6_add_request(struct intel_ring_buffer *ring,
475		 u32 *seqno)
476{
477	u32 mbox1_reg;
478	u32 mbox2_reg;
479	int ret;
480
481	ret = intel_ring_begin(ring, 10);
482	if (ret)
483		return ret;
484
485	mbox1_reg = ring->signal_mbox[0];
486	mbox2_reg = ring->signal_mbox[1];
487
488	*seqno = i915_gem_next_request_seqno(ring);
489
490	update_mboxes(ring, *seqno, mbox1_reg);
491	update_mboxes(ring, *seqno, mbox2_reg);
492	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
493	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
494	intel_ring_emit(ring, *seqno);
495	intel_ring_emit(ring, MI_USER_INTERRUPT);
496	intel_ring_advance(ring);
497
498	return 0;
499}
500
501/**
502 * intel_ring_sync - sync the waiter to the signaller on seqno
503 *
504 * @waiter - ring that is waiting
505 * @signaller - ring which has, or will signal
506 * @seqno - seqno which the waiter will block on
507 */
508static int
509intel_ring_sync(struct intel_ring_buffer *waiter,
510		struct intel_ring_buffer *signaller,
511		int ring,
512		u32 seqno)
513{
514	int ret;
515	u32 dw1 = MI_SEMAPHORE_MBOX |
516		  MI_SEMAPHORE_COMPARE |
517		  MI_SEMAPHORE_REGISTER;
518
519	ret = intel_ring_begin(waiter, 4);
520	if (ret)
521		return ret;
522
523	intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
524	intel_ring_emit(waiter, seqno);
525	intel_ring_emit(waiter, 0);
526	intel_ring_emit(waiter, MI_NOOP);
527	intel_ring_advance(waiter);
528
529	return 0;
530}
531
532int render_ring_sync_to(struct intel_ring_buffer *waiter,
533    struct intel_ring_buffer *signaller, u32 seqno);
534int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
535    struct intel_ring_buffer *signaller, u32 seqno);
536int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
537    struct intel_ring_buffer *signaller, u32 seqno);
538
539/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
540int
541render_ring_sync_to(struct intel_ring_buffer *waiter,
542		    struct intel_ring_buffer *signaller,
543		    u32 seqno)
544{
545	KASSERT(signaller->semaphore_register[RCS] != MI_SEMAPHORE_SYNC_INVALID,
546	    ("valid RCS semaphore"));
547	return intel_ring_sync(waiter,
548			       signaller,
549			       RCS,
550			       seqno);
551}
552
553/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
554int
555gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
556		      struct intel_ring_buffer *signaller,
557		      u32 seqno)
558{
559	KASSERT(signaller->semaphore_register[VCS] != MI_SEMAPHORE_SYNC_INVALID,
560	    ("Valid VCS semaphore"));
561	return intel_ring_sync(waiter,
562			       signaller,
563			       VCS,
564			       seqno);
565}
566
567/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
568int
569gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
570		      struct intel_ring_buffer *signaller,
571		      u32 seqno)
572{
573	KASSERT(signaller->semaphore_register[BCS] != MI_SEMAPHORE_SYNC_INVALID,
574	    ("Valid BCS semaphore"));
575	return intel_ring_sync(waiter,
576			       signaller,
577			       BCS,
578			       seqno);
579}
580
581#define PIPE_CONTROL_FLUSH(ring__, addr__)					\
582do {									\
583	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
584		 PIPE_CONTROL_DEPTH_STALL);				\
585	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
586	intel_ring_emit(ring__, 0);							\
587	intel_ring_emit(ring__, 0);							\
588} while (0)
589
590static int
591pc_render_add_request(struct intel_ring_buffer *ring,
592		      uint32_t *result)
593{
594	u32 seqno = i915_gem_next_request_seqno(ring);
595	struct pipe_control *pc = ring->private;
596	u32 scratch_addr = pc->gtt_offset + 128;
597	int ret;
598
599	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
600	 * incoherent with writes to memory, i.e. completely fubar,
601	 * so we need to use PIPE_NOTIFY instead.
602	 *
603	 * However, we also need to workaround the qword write
604	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
605	 * memory before requesting an interrupt.
606	 */
607	ret = intel_ring_begin(ring, 32);
608	if (ret)
609		return ret;
610
611	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
612			PIPE_CONTROL_WRITE_FLUSH |
613			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
614	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
615	intel_ring_emit(ring, seqno);
616	intel_ring_emit(ring, 0);
617	PIPE_CONTROL_FLUSH(ring, scratch_addr);
618	scratch_addr += 128; /* write to separate cachelines */
619	PIPE_CONTROL_FLUSH(ring, scratch_addr);
620	scratch_addr += 128;
621	PIPE_CONTROL_FLUSH(ring, scratch_addr);
622	scratch_addr += 128;
623	PIPE_CONTROL_FLUSH(ring, scratch_addr);
624	scratch_addr += 128;
625	PIPE_CONTROL_FLUSH(ring, scratch_addr);
626	scratch_addr += 128;
627	PIPE_CONTROL_FLUSH(ring, scratch_addr);
628	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
629			PIPE_CONTROL_WRITE_FLUSH |
630			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
631			PIPE_CONTROL_NOTIFY);
632	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
633	intel_ring_emit(ring, seqno);
634	intel_ring_emit(ring, 0);
635	intel_ring_advance(ring);
636
637	*result = seqno;
638	return 0;
639}
640
641static int
642render_ring_add_request(struct intel_ring_buffer *ring,
643			uint32_t *result)
644{
645	u32 seqno = i915_gem_next_request_seqno(ring);
646	int ret;
647
648	ret = intel_ring_begin(ring, 4);
649	if (ret)
650		return ret;
651
652	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
653	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
654	intel_ring_emit(ring, seqno);
655	intel_ring_emit(ring, MI_USER_INTERRUPT);
656	intel_ring_advance(ring);
657
658	*result = seqno;
659	return 0;
660}
661
662 static u32
663gen6_ring_get_seqno(struct intel_ring_buffer *ring)
664{
665	struct drm_device *dev = ring->dev;
666
667	/* Workaround to force correct ordering between irq and seqno writes on
668	 * ivb (and maybe also on snb) by reading from a CS register (like
669	 * ACTHD) before reading the status page. */
670	if (/* IS_GEN6(dev) || */IS_GEN7(dev))
671		intel_ring_get_active_head(ring);
672	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
673}
674
675static uint32_t
676ring_get_seqno(struct intel_ring_buffer *ring)
677{
678	if (ring->status_page.page_addr == NULL)
679		return (-1);
680	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
681}
682
683static uint32_t
684pc_render_get_seqno(struct intel_ring_buffer *ring)
685{
686	struct pipe_control *pc = ring->private;
687	if (pc != NULL)
688		return pc->cpu_page[0];
689	else
690		return (-1);
691}
692
693static void
694ironlake_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
695{
696	dev_priv->gt_irq_mask &= ~mask;
697	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
698	POSTING_READ(GTIMR);
699}
700
701static void
702ironlake_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
703{
704	dev_priv->gt_irq_mask |= mask;
705	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
706	POSTING_READ(GTIMR);
707}
708
709static void
710i915_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
711{
712	dev_priv->irq_mask &= ~mask;
713	I915_WRITE(IMR, dev_priv->irq_mask);
714	POSTING_READ(IMR);
715}
716
717static void
718i915_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
719{
720	dev_priv->irq_mask |= mask;
721	I915_WRITE(IMR, dev_priv->irq_mask);
722	POSTING_READ(IMR);
723}
724
725static bool
726render_ring_get_irq(struct intel_ring_buffer *ring)
727{
728	struct drm_device *dev = ring->dev;
729	drm_i915_private_t *dev_priv = dev->dev_private;
730
731	if (!dev->irq_enabled)
732		return false;
733
734	mtx_assert(&ring->irq_lock, MA_OWNED);
735	if (ring->irq_refcount++ == 0) {
736		if (HAS_PCH_SPLIT(dev))
737			ironlake_enable_irq(dev_priv,
738					    GT_PIPE_NOTIFY | GT_USER_INTERRUPT);
739		else
740			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
741	}
742
743	return true;
744}
745
746static void
747render_ring_put_irq(struct intel_ring_buffer *ring)
748{
749	struct drm_device *dev = ring->dev;
750	drm_i915_private_t *dev_priv = dev->dev_private;
751
752	mtx_assert(&ring->irq_lock, MA_OWNED);
753	if (--ring->irq_refcount == 0) {
754		if (HAS_PCH_SPLIT(dev))
755			ironlake_disable_irq(dev_priv,
756					     GT_USER_INTERRUPT |
757					     GT_PIPE_NOTIFY);
758		else
759			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
760	}
761}
762
763void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
764{
765	struct drm_device *dev = ring->dev;
766	drm_i915_private_t *dev_priv = dev->dev_private;
767	uint32_t mmio = 0;
768
769	/* The ring status page addresses are no longer next to the rest of
770	 * the ring registers as of gen7.
771	 */
772	if (IS_GEN7(dev)) {
773		switch (ring->id) {
774		case RCS:
775			mmio = RENDER_HWS_PGA_GEN7;
776			break;
777		case BCS:
778			mmio = BLT_HWS_PGA_GEN7;
779			break;
780		case VCS:
781			mmio = BSD_HWS_PGA_GEN7;
782			break;
783		}
784	} else if (IS_GEN6(dev)) {
785		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
786	} else {
787		mmio = RING_HWS_PGA(ring->mmio_base);
788	}
789
790	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
791	POSTING_READ(mmio);
792}
793
794static int
795bsd_ring_flush(struct intel_ring_buffer *ring,
796	       uint32_t     invalidate_domains,
797	       uint32_t     flush_domains)
798{
799	int ret;
800
801	ret = intel_ring_begin(ring, 2);
802	if (ret)
803		return ret;
804
805	intel_ring_emit(ring, MI_FLUSH);
806	intel_ring_emit(ring, MI_NOOP);
807	intel_ring_advance(ring);
808	return 0;
809}
810
811static int
812ring_add_request(struct intel_ring_buffer *ring,
813		 uint32_t *result)
814{
815	uint32_t seqno;
816	int ret;
817
818	ret = intel_ring_begin(ring, 4);
819	if (ret)
820		return ret;
821
822	seqno = i915_gem_next_request_seqno(ring);
823
824	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
825	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
826	intel_ring_emit(ring, seqno);
827	intel_ring_emit(ring, MI_USER_INTERRUPT);
828	intel_ring_advance(ring);
829
830	*result = seqno;
831	return 0;
832}
833
834static bool
835gen6_ring_get_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
836{
837	struct drm_device *dev = ring->dev;
838	drm_i915_private_t *dev_priv = dev->dev_private;
839
840	if (!dev->irq_enabled)
841	       return false;
842
843	gen6_gt_force_wake_get(dev_priv);
844
845	mtx_assert(&ring->irq_lock, MA_OWNED);
846	if (ring->irq_refcount++ == 0) {
847		ring->irq_mask &= ~rflag;
848		I915_WRITE_IMR(ring, ring->irq_mask);
849		ironlake_enable_irq(dev_priv, gflag);
850	}
851
852	return true;
853}
854
855static void
856gen6_ring_put_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
857{
858	struct drm_device *dev = ring->dev;
859	drm_i915_private_t *dev_priv = dev->dev_private;
860
861	mtx_assert(&ring->irq_lock, MA_OWNED);
862	if (--ring->irq_refcount == 0) {
863		ring->irq_mask |= rflag;
864		I915_WRITE_IMR(ring, ring->irq_mask);
865		ironlake_disable_irq(dev_priv, gflag);
866	}
867
868	gen6_gt_force_wake_put(dev_priv);
869}
870
871static bool
872bsd_ring_get_irq(struct intel_ring_buffer *ring)
873{
874	struct drm_device *dev = ring->dev;
875	drm_i915_private_t *dev_priv = dev->dev_private;
876
877	if (!dev->irq_enabled)
878		return false;
879
880	mtx_assert(&ring->irq_lock, MA_OWNED);
881	if (ring->irq_refcount++ == 0) {
882		if (IS_G4X(dev))
883			i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
884		else
885			ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
886	}
887
888	return true;
889}
890static void
891bsd_ring_put_irq(struct intel_ring_buffer *ring)
892{
893	struct drm_device *dev = ring->dev;
894	drm_i915_private_t *dev_priv = dev->dev_private;
895
896	mtx_assert(&ring->irq_lock, MA_OWNED);
897	if (--ring->irq_refcount == 0) {
898		if (IS_G4X(dev))
899			i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
900		else
901			ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
902	}
903}
904
905static int
906ring_dispatch_execbuffer(struct intel_ring_buffer *ring, uint32_t offset,
907    uint32_t length)
908{
909	int ret;
910
911	ret = intel_ring_begin(ring, 2);
912	if (ret)
913		return ret;
914
915	intel_ring_emit(ring,
916			MI_BATCH_BUFFER_START | (2 << 6) |
917			MI_BATCH_NON_SECURE_I965);
918	intel_ring_emit(ring, offset);
919	intel_ring_advance(ring);
920
921	return 0;
922}
923
924static int
925render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
926				uint32_t offset, uint32_t len)
927{
928	struct drm_device *dev = ring->dev;
929	int ret;
930
931	if (IS_I830(dev) || IS_845G(dev)) {
932		ret = intel_ring_begin(ring, 4);
933		if (ret)
934			return ret;
935
936		intel_ring_emit(ring, MI_BATCH_BUFFER);
937		intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
938		intel_ring_emit(ring, offset + len - 8);
939		intel_ring_emit(ring, 0);
940	} else {
941		ret = intel_ring_begin(ring, 2);
942		if (ret)
943			return ret;
944
945		if (INTEL_INFO(dev)->gen >= 4) {
946			intel_ring_emit(ring,
947					MI_BATCH_BUFFER_START | (2 << 6) |
948					MI_BATCH_NON_SECURE_I965);
949			intel_ring_emit(ring, offset);
950		} else {
951			intel_ring_emit(ring,
952					MI_BATCH_BUFFER_START | (2 << 6));
953			intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
954		}
955	}
956	intel_ring_advance(ring);
957
958	return 0;
959}
960
961static void cleanup_status_page(struct intel_ring_buffer *ring)
962{
963	drm_i915_private_t *dev_priv = ring->dev->dev_private;
964	struct drm_i915_gem_object *obj;
965
966	obj = ring->status_page.obj;
967	if (obj == NULL)
968		return;
969
970	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
971	kva_free((vm_offset_t)ring->status_page.page_addr,
972	    PAGE_SIZE);
973	i915_gem_object_unpin(obj);
974	drm_gem_object_unreference(&obj->base);
975	ring->status_page.obj = NULL;
976
977	memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
978}
979
980static int init_status_page(struct intel_ring_buffer *ring)
981{
982	struct drm_device *dev = ring->dev;
983	drm_i915_private_t *dev_priv = dev->dev_private;
984	struct drm_i915_gem_object *obj;
985	int ret;
986
987	obj = i915_gem_alloc_object(dev, 4096);
988	if (obj == NULL) {
989		DRM_ERROR("Failed to allocate status page\n");
990		ret = -ENOMEM;
991		goto err;
992	}
993
994	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
995
996	ret = i915_gem_object_pin(obj, 4096, true);
997	if (ret != 0) {
998		goto err_unref;
999	}
1000
1001	ring->status_page.gfx_addr = obj->gtt_offset;
1002	ring->status_page.page_addr = (void *)kva_alloc(PAGE_SIZE);
1003	if (ring->status_page.page_addr == NULL) {
1004		memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
1005		goto err_unpin;
1006	}
1007	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1008	    1);
1009	pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
1010	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1011	ring->status_page.obj = obj;
1012	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1013
1014	intel_ring_setup_status_page(ring);
1015	DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n",
1016			ring->name, ring->status_page.gfx_addr);
1017
1018	return 0;
1019
1020err_unpin:
1021	i915_gem_object_unpin(obj);
1022err_unref:
1023	drm_gem_object_unreference(&obj->base);
1024err:
1025	return ret;
1026}
1027
1028static
1029int intel_init_ring_buffer(struct drm_device *dev,
1030			   struct intel_ring_buffer *ring)
1031{
1032	struct drm_i915_gem_object *obj;
1033	int ret;
1034
1035	ring->dev = dev;
1036	INIT_LIST_HEAD(&ring->active_list);
1037	INIT_LIST_HEAD(&ring->request_list);
1038	INIT_LIST_HEAD(&ring->gpu_write_list);
1039
1040	mtx_init(&ring->irq_lock, "ringb", NULL, MTX_DEF);
1041	ring->irq_mask = ~0;
1042
1043	if (I915_NEED_GFX_HWS(dev)) {
1044		ret = init_status_page(ring);
1045		if (ret)
1046			return ret;
1047	}
1048
1049	obj = i915_gem_alloc_object(dev, ring->size);
1050	if (obj == NULL) {
1051		DRM_ERROR("Failed to allocate ringbuffer\n");
1052		ret = -ENOMEM;
1053		goto err_hws;
1054	}
1055
1056	ring->obj = obj;
1057
1058	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1059	if (ret)
1060		goto err_unref;
1061
1062	ring->map.size = ring->size;
1063	ring->map.offset = dev->agp->base + obj->gtt_offset;
1064	ring->map.type = 0;
1065	ring->map.flags = 0;
1066	ring->map.mtrr = 0;
1067
1068	drm_core_ioremap_wc(&ring->map, dev);
1069	if (ring->map.virtual == NULL) {
1070		DRM_ERROR("Failed to map ringbuffer.\n");
1071		ret = -EINVAL;
1072		goto err_unpin;
1073	}
1074
1075	ring->virtual_start = ring->map.virtual;
1076	ret = ring->init(ring);
1077	if (ret)
1078		goto err_unmap;
1079
1080	/* Workaround an erratum on the i830 which causes a hang if
1081	 * the TAIL pointer points to within the last 2 cachelines
1082	 * of the buffer.
1083	 */
1084	ring->effective_size = ring->size;
1085	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1086		ring->effective_size -= 128;
1087
1088	return 0;
1089
1090err_unmap:
1091	drm_core_ioremapfree(&ring->map, dev);
1092err_unpin:
1093	i915_gem_object_unpin(obj);
1094err_unref:
1095	drm_gem_object_unreference(&obj->base);
1096	ring->obj = NULL;
1097err_hws:
1098	cleanup_status_page(ring);
1099	return ret;
1100}
1101
1102void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1103{
1104	struct drm_i915_private *dev_priv;
1105	int ret;
1106
1107	if (ring->obj == NULL)
1108		return;
1109
1110	/* Disable the ring buffer. The ring must be idle at this point */
1111	dev_priv = ring->dev->dev_private;
1112	ret = intel_wait_ring_idle(ring);
1113	I915_WRITE_CTL(ring, 0);
1114
1115	drm_core_ioremapfree(&ring->map, ring->dev);
1116
1117	i915_gem_object_unpin(ring->obj);
1118	drm_gem_object_unreference(&ring->obj->base);
1119	ring->obj = NULL;
1120
1121	if (ring->cleanup)
1122		ring->cleanup(ring);
1123
1124	cleanup_status_page(ring);
1125}
1126
1127static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1128{
1129	unsigned int *virt;
1130	int rem = ring->size - ring->tail;
1131
1132	if (ring->space < rem) {
1133		int ret = intel_wait_ring_buffer(ring, rem);
1134		if (ret)
1135			return ret;
1136	}
1137
1138	virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1139	rem /= 8;
1140	while (rem--) {
1141		*virt++ = MI_NOOP;
1142		*virt++ = MI_NOOP;
1143	}
1144
1145	ring->tail = 0;
1146	ring->space = ring_space(ring);
1147
1148	return 0;
1149}
1150
1151static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1152{
1153	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1154	bool was_interruptible;
1155	int ret;
1156
1157	/* XXX As we have not yet audited all the paths to check that
1158	 * they are ready for ERESTARTSYS from intel_ring_begin, do not
1159	 * allow us to be interruptible by a signal.
1160	 */
1161	was_interruptible = dev_priv->mm.interruptible;
1162	dev_priv->mm.interruptible = false;
1163
1164	ret = i915_wait_request(ring, seqno, true);
1165
1166	dev_priv->mm.interruptible = was_interruptible;
1167
1168	return ret;
1169}
1170
1171static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1172{
1173	struct drm_i915_gem_request *request;
1174	u32 seqno = 0;
1175	int ret;
1176
1177	i915_gem_retire_requests_ring(ring);
1178
1179	if (ring->last_retired_head != -1) {
1180		ring->head = ring->last_retired_head;
1181		ring->last_retired_head = -1;
1182		ring->space = ring_space(ring);
1183		if (ring->space >= n)
1184			return 0;
1185	}
1186
1187	list_for_each_entry(request, &ring->request_list, list) {
1188		int space;
1189
1190		if (request->tail == -1)
1191			continue;
1192
1193		space = request->tail - (ring->tail + 8);
1194		if (space < 0)
1195			space += ring->size;
1196		if (space >= n) {
1197			seqno = request->seqno;
1198			break;
1199		}
1200
1201		/* Consume this request in case we need more space than
1202		 * is available and so need to prevent a race between
1203		 * updating last_retired_head and direct reads of
1204		 * I915_RING_HEAD. It also provides a nice sanity check.
1205		 */
1206		request->tail = -1;
1207	}
1208
1209	if (seqno == 0)
1210		return -ENOSPC;
1211
1212	ret = intel_ring_wait_seqno(ring, seqno);
1213	if (ret)
1214		return ret;
1215
1216	if (ring->last_retired_head == -1)
1217		return -ENOSPC;
1218
1219	ring->head = ring->last_retired_head;
1220	ring->last_retired_head = -1;
1221	ring->space = ring_space(ring);
1222	if (ring->space < n)
1223		return -ENOSPC;
1224
1225	return 0;
1226}
1227
1228int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1229{
1230	struct drm_device *dev = ring->dev;
1231	struct drm_i915_private *dev_priv = dev->dev_private;
1232	int end;
1233	int ret;
1234
1235	ret = intel_ring_wait_request(ring, n);
1236	if (ret != -ENOSPC)
1237		return ret;
1238
1239	CTR1(KTR_DRM, "ring_wait_begin %s", ring->name);
1240	if (drm_core_check_feature(dev, DRIVER_GEM))
1241		/* With GEM the hangcheck timer should kick us out of the loop,
1242		 * leaving it early runs the risk of corrupting GEM state (due
1243		 * to running on almost untested codepaths). But on resume
1244		 * timers don't work yet, so prevent a complete hang in that
1245		 * case by choosing an insanely large timeout. */
1246		end = ticks + hz * 60;
1247	else
1248		end = ticks + hz * 3;
1249	do {
1250		ring->head = I915_READ_HEAD(ring);
1251		ring->space = ring_space(ring);
1252		if (ring->space >= n) {
1253			CTR1(KTR_DRM, "ring_wait_end %s", ring->name);
1254			return 0;
1255		}
1256
1257#if 0
1258		if (dev->primary->master) {
1259			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1260			if (master_priv->sarea_priv)
1261				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1262		}
1263#else
1264		if (dev_priv->sarea_priv)
1265			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1266#endif
1267
1268		pause("915rng", 1);
1269		if (atomic_load_acq_32(&dev_priv->mm.wedged) != 0) {
1270			CTR1(KTR_DRM, "ring_wait_end %s wedged", ring->name);
1271			return -EAGAIN;
1272		}
1273	} while (!time_after(ticks, end));
1274	CTR1(KTR_DRM, "ring_wait_end %s busy", ring->name);
1275	return -EBUSY;
1276}
1277
1278int intel_ring_begin(struct intel_ring_buffer *ring,
1279		     int num_dwords)
1280{
1281	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1282	int n = 4*num_dwords;
1283	int ret;
1284
1285	if (atomic_load_acq_int(&dev_priv->mm.wedged))
1286		return -EIO;
1287
1288	if (ring->tail + n > ring->effective_size) {
1289		ret = intel_wrap_ring_buffer(ring);
1290		if (ret != 0)
1291			return ret;
1292	}
1293
1294	if (ring->space < n) {
1295		ret = intel_wait_ring_buffer(ring, n);
1296		if (ret != 0)
1297			return ret;
1298	}
1299
1300	ring->space -= n;
1301	return 0;
1302}
1303
1304void intel_ring_advance(struct intel_ring_buffer *ring)
1305{
1306	ring->tail &= ring->size - 1;
1307	ring->write_tail(ring, ring->tail);
1308}
1309
1310static const struct intel_ring_buffer render_ring = {
1311	.name			= "render ring",
1312	.id			= RCS,
1313	.mmio_base		= RENDER_RING_BASE,
1314	.size			= 32 * PAGE_SIZE,
1315	.init			= init_render_ring,
1316	.write_tail		= ring_write_tail,
1317	.flush			= render_ring_flush,
1318	.add_request		= render_ring_add_request,
1319	.get_seqno		= ring_get_seqno,
1320	.irq_get		= render_ring_get_irq,
1321	.irq_put		= render_ring_put_irq,
1322	.dispatch_execbuffer	= render_ring_dispatch_execbuffer,
1323	.cleanup		= render_ring_cleanup,
1324	.sync_to		= render_ring_sync_to,
1325	.semaphore_register	= {MI_SEMAPHORE_SYNC_INVALID,
1326				   MI_SEMAPHORE_SYNC_RV,
1327				   MI_SEMAPHORE_SYNC_RB},
1328	.signal_mbox		= {GEN6_VRSYNC, GEN6_BRSYNC},
1329};
1330
1331/* ring buffer for bit-stream decoder */
1332
1333static const struct intel_ring_buffer bsd_ring = {
1334	.name                   = "bsd ring",
1335	.id			= VCS,
1336	.mmio_base		= BSD_RING_BASE,
1337	.size			= 32 * PAGE_SIZE,
1338	.init			= init_ring_common,
1339	.write_tail		= ring_write_tail,
1340	.flush			= bsd_ring_flush,
1341	.add_request		= ring_add_request,
1342	.get_seqno		= ring_get_seqno,
1343	.irq_get		= bsd_ring_get_irq,
1344	.irq_put		= bsd_ring_put_irq,
1345	.dispatch_execbuffer	= ring_dispatch_execbuffer,
1346};
1347
1348
1349static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1350				     uint32_t value)
1351{
1352	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1353
1354	/* Every tail move must follow the sequence below */
1355	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1356	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1357	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
1358	I915_WRITE(GEN6_BSD_RNCID, 0x0);
1359
1360	if (_intel_wait_for(ring->dev,
1361	    (I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1362	     GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0, 50,
1363	    true, "915g6i") != 0)
1364		DRM_ERROR("timed out waiting for IDLE Indicator\n");
1365
1366	I915_WRITE_TAIL(ring, value);
1367	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1368	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1369	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
1370}
1371
1372static int gen6_ring_flush(struct intel_ring_buffer *ring,
1373			   uint32_t invalidate, uint32_t flush)
1374{
1375	uint32_t cmd;
1376	int ret;
1377
1378	ret = intel_ring_begin(ring, 4);
1379	if (ret)
1380		return ret;
1381
1382	cmd = MI_FLUSH_DW;
1383	if (invalidate & I915_GEM_GPU_DOMAINS)
1384		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1385	intel_ring_emit(ring, cmd);
1386	intel_ring_emit(ring, 0);
1387	intel_ring_emit(ring, 0);
1388	intel_ring_emit(ring, MI_NOOP);
1389	intel_ring_advance(ring);
1390	return 0;
1391}
1392
1393static int
1394gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1395			      uint32_t offset, uint32_t len)
1396{
1397	int ret;
1398
1399	ret = intel_ring_begin(ring, 2);
1400	if (ret)
1401		return ret;
1402
1403	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1404	/* bit0-7 is the length on GEN6+ */
1405	intel_ring_emit(ring, offset);
1406	intel_ring_advance(ring);
1407
1408	return 0;
1409}
1410
1411static bool
1412gen6_render_ring_get_irq(struct intel_ring_buffer *ring)
1413{
1414	return gen6_ring_get_irq(ring,
1415				 GT_USER_INTERRUPT,
1416				 GEN6_RENDER_USER_INTERRUPT);
1417}
1418
1419static void
1420gen6_render_ring_put_irq(struct intel_ring_buffer *ring)
1421{
1422	return gen6_ring_put_irq(ring,
1423				 GT_USER_INTERRUPT,
1424				 GEN6_RENDER_USER_INTERRUPT);
1425}
1426
1427static bool
1428gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring)
1429{
1430	return gen6_ring_get_irq(ring,
1431				 GT_GEN6_BSD_USER_INTERRUPT,
1432				 GEN6_BSD_USER_INTERRUPT);
1433}
1434
1435static void
1436gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring)
1437{
1438	return gen6_ring_put_irq(ring,
1439				 GT_GEN6_BSD_USER_INTERRUPT,
1440				 GEN6_BSD_USER_INTERRUPT);
1441}
1442
1443/* ring buffer for Video Codec for Gen6+ */
1444static const struct intel_ring_buffer gen6_bsd_ring = {
1445	.name			= "gen6 bsd ring",
1446	.id			= VCS,
1447	.mmio_base		= GEN6_BSD_RING_BASE,
1448	.size			= 32 * PAGE_SIZE,
1449	.init			= init_ring_common,
1450	.write_tail		= gen6_bsd_ring_write_tail,
1451	.flush			= gen6_ring_flush,
1452	.add_request		= gen6_add_request,
1453	.get_seqno		= gen6_ring_get_seqno,
1454	.irq_get		= gen6_bsd_ring_get_irq,
1455	.irq_put		= gen6_bsd_ring_put_irq,
1456	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1457	.sync_to		= gen6_bsd_ring_sync_to,
1458	.semaphore_register	= {MI_SEMAPHORE_SYNC_VR,
1459				   MI_SEMAPHORE_SYNC_INVALID,
1460				   MI_SEMAPHORE_SYNC_VB},
1461	.signal_mbox		= {GEN6_RVSYNC, GEN6_BVSYNC},
1462};
1463
1464/* Blitter support (SandyBridge+) */
1465
1466static bool
1467blt_ring_get_irq(struct intel_ring_buffer *ring)
1468{
1469	return gen6_ring_get_irq(ring,
1470				 GT_BLT_USER_INTERRUPT,
1471				 GEN6_BLITTER_USER_INTERRUPT);
1472}
1473
1474static void
1475blt_ring_put_irq(struct intel_ring_buffer *ring)
1476{
1477	gen6_ring_put_irq(ring,
1478			  GT_BLT_USER_INTERRUPT,
1479			  GEN6_BLITTER_USER_INTERRUPT);
1480}
1481
1482static int blt_ring_flush(struct intel_ring_buffer *ring,
1483			  uint32_t invalidate, uint32_t flush)
1484{
1485	uint32_t cmd;
1486	int ret;
1487
1488	ret = intel_ring_begin(ring, 4);
1489	if (ret)
1490		return ret;
1491
1492	cmd = MI_FLUSH_DW;
1493	if (invalidate & I915_GEM_DOMAIN_RENDER)
1494		cmd |= MI_INVALIDATE_TLB;
1495	intel_ring_emit(ring, cmd);
1496	intel_ring_emit(ring, 0);
1497	intel_ring_emit(ring, 0);
1498	intel_ring_emit(ring, MI_NOOP);
1499	intel_ring_advance(ring);
1500	return 0;
1501}
1502
1503static const struct intel_ring_buffer gen6_blt_ring = {
1504	.name			= "blt ring",
1505	.id			= BCS,
1506	.mmio_base		= BLT_RING_BASE,
1507	.size			= 32 * PAGE_SIZE,
1508	.init			= init_ring_common,
1509	.write_tail		= ring_write_tail,
1510	.flush			= blt_ring_flush,
1511	.add_request		= gen6_add_request,
1512	.get_seqno		= gen6_ring_get_seqno,
1513	.irq_get		= blt_ring_get_irq,
1514	.irq_put		= blt_ring_put_irq,
1515	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1516	.sync_to		= gen6_blt_ring_sync_to,
1517	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
1518				   MI_SEMAPHORE_SYNC_BV,
1519				   MI_SEMAPHORE_SYNC_INVALID},
1520	.signal_mbox		= {GEN6_RBSYNC, GEN6_VBSYNC},
1521};
1522
1523int intel_init_render_ring_buffer(struct drm_device *dev)
1524{
1525	drm_i915_private_t *dev_priv = dev->dev_private;
1526	struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1527
1528	*ring = render_ring;
1529	if (INTEL_INFO(dev)->gen >= 6) {
1530		ring->add_request = gen6_add_request;
1531		ring->flush = gen6_render_ring_flush;
1532		ring->irq_get = gen6_render_ring_get_irq;
1533		ring->irq_put = gen6_render_ring_put_irq;
1534		ring->get_seqno = gen6_ring_get_seqno;
1535	} else if (IS_GEN5(dev)) {
1536		ring->add_request = pc_render_add_request;
1537		ring->get_seqno = pc_render_get_seqno;
1538	}
1539
1540	if (!I915_NEED_GFX_HWS(dev)) {
1541		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1542		memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1543	}
1544
1545	return intel_init_ring_buffer(dev, ring);
1546}
1547
1548int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start,
1549    uint32_t size)
1550{
1551	drm_i915_private_t *dev_priv = dev->dev_private;
1552	struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1553
1554	*ring = render_ring;
1555	if (INTEL_INFO(dev)->gen >= 6) {
1556		ring->add_request = gen6_add_request;
1557		ring->irq_get = gen6_render_ring_get_irq;
1558		ring->irq_put = gen6_render_ring_put_irq;
1559	} else if (IS_GEN5(dev)) {
1560		ring->add_request = pc_render_add_request;
1561		ring->get_seqno = pc_render_get_seqno;
1562	}
1563
1564	ring->dev = dev;
1565	INIT_LIST_HEAD(&ring->active_list);
1566	INIT_LIST_HEAD(&ring->request_list);
1567	INIT_LIST_HEAD(&ring->gpu_write_list);
1568
1569	ring->size = size;
1570	ring->effective_size = ring->size;
1571	if (IS_I830(ring->dev))
1572		ring->effective_size -= 128;
1573
1574	ring->map.offset = start;
1575	ring->map.size = size;
1576	ring->map.type = 0;
1577	ring->map.flags = 0;
1578	ring->map.mtrr = 0;
1579
1580	drm_core_ioremap_wc(&ring->map, dev);
1581	if (ring->map.virtual == NULL) {
1582		DRM_ERROR("can not ioremap virtual address for"
1583			  " ring buffer\n");
1584		return -ENOMEM;
1585	}
1586
1587	ring->virtual_start = (void *)ring->map.virtual;
1588	return 0;
1589}
1590
1591int intel_init_bsd_ring_buffer(struct drm_device *dev)
1592{
1593	drm_i915_private_t *dev_priv = dev->dev_private;
1594	struct intel_ring_buffer *ring = &dev_priv->rings[VCS];
1595
1596	if (IS_GEN6(dev) || IS_GEN7(dev))
1597		*ring = gen6_bsd_ring;
1598	else
1599		*ring = bsd_ring;
1600
1601	return intel_init_ring_buffer(dev, ring);
1602}
1603
1604int intel_init_blt_ring_buffer(struct drm_device *dev)
1605{
1606	drm_i915_private_t *dev_priv = dev->dev_private;
1607	struct intel_ring_buffer *ring = &dev_priv->rings[BCS];
1608
1609	*ring = gen6_blt_ring;
1610
1611	return intel_init_ring_buffer(dev, ring);
1612}
1613