intel_ringbuffer.c revision 272761
1/*
2 * Copyright �� 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *    Zou Nan hai <nanhai.zou@intel.com>
26 *    Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 272761 2014-10-08 16:48:03Z kib $");
32
33#include <dev/drm2/drmP.h>
34#include <dev/drm2/drm.h>
35#include <dev/drm2/i915/i915_drm.h>
36#include <dev/drm2/i915/i915_drv.h>
37#include <dev/drm2/i915/intel_drv.h>
38#include <dev/drm2/i915/intel_ringbuffer.h>
39#include <sys/sched.h>
40#include <sys/sf_buf.h>
41
42/*
43 * 965+ support PIPE_CONTROL commands, which provide finer grained control
44 * over cache flushing.
45 */
46struct pipe_control {
47	struct drm_i915_gem_object *obj;
48	volatile u32 *cpu_page;
49	u32 gtt_offset;
50};
51
52void
53i915_trace_irq_get(struct intel_ring_buffer *ring, uint32_t seqno)
54{
55
56	if (ring->trace_irq_seqno == 0) {
57		mtx_lock(&ring->irq_lock);
58		if (ring->irq_get(ring))
59			ring->trace_irq_seqno = seqno;
60		mtx_unlock(&ring->irq_lock);
61	}
62}
63
64static inline int ring_space(struct intel_ring_buffer *ring)
65{
66	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
67	if (space < 0)
68		space += ring->size;
69	return space;
70}
71
72static int
73render_ring_flush(struct intel_ring_buffer *ring,
74		  uint32_t	invalidate_domains,
75		  uint32_t	flush_domains)
76{
77	struct drm_device *dev = ring->dev;
78	uint32_t cmd;
79	int ret;
80
81	/*
82	 * read/write caches:
83	 *
84	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
85	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
86	 * also flushed at 2d versus 3d pipeline switches.
87	 *
88	 * read-only caches:
89	 *
90	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
91	 * MI_READ_FLUSH is set, and is always flushed on 965.
92	 *
93	 * I915_GEM_DOMAIN_COMMAND may not exist?
94	 *
95	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
96	 * invalidated when MI_EXE_FLUSH is set.
97	 *
98	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
99	 * invalidated with every MI_FLUSH.
100	 *
101	 * TLBs:
102	 *
103	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
104	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
105	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
106	 * are flushed at any MI_FLUSH.
107	 */
108
109	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
110	if ((invalidate_domains|flush_domains) &
111	    I915_GEM_DOMAIN_RENDER)
112		cmd &= ~MI_NO_WRITE_FLUSH;
113	if (INTEL_INFO(dev)->gen < 4) {
114		/*
115		 * On the 965, the sampler cache always gets flushed
116		 * and this bit is reserved.
117		 */
118		if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
119			cmd |= MI_READ_FLUSH;
120	}
121	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
122		cmd |= MI_EXE_FLUSH;
123
124	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
125	    (IS_G4X(dev) || IS_GEN5(dev)))
126		cmd |= MI_INVALIDATE_ISP;
127
128	ret = intel_ring_begin(ring, 2);
129	if (ret)
130		return ret;
131
132	intel_ring_emit(ring, cmd);
133	intel_ring_emit(ring, MI_NOOP);
134	intel_ring_advance(ring);
135
136	return 0;
137}
138
139/**
140 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
141 * implementing two workarounds on gen6.  From section 1.4.7.1
142 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
143 *
144 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
145 * produced by non-pipelined state commands), software needs to first
146 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
147 * 0.
148 *
149 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
150 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
151 *
152 * And the workaround for these two requires this workaround first:
153 *
154 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
155 * BEFORE the pipe-control with a post-sync op and no write-cache
156 * flushes.
157 *
158 * And this last workaround is tricky because of the requirements on
159 * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
160 * volume 2 part 1:
161 *
162 *     "1 of the following must also be set:
163 *      - Render Target Cache Flush Enable ([12] of DW1)
164 *      - Depth Cache Flush Enable ([0] of DW1)
165 *      - Stall at Pixel Scoreboard ([1] of DW1)
166 *      - Depth Stall ([13] of DW1)
167 *      - Post-Sync Operation ([13] of DW1)
168 *      - Notify Enable ([8] of DW1)"
169 *
170 * The cache flushes require the workaround flush that triggered this
171 * one, so we can't use it.  Depth stall would trigger the same.
172 * Post-sync nonzero is what triggered this second workaround, so we
173 * can't use that one either.  Notify enable is IRQs, which aren't
174 * really our business.  That leaves only stall at scoreboard.
175 */
176static int
177intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
178{
179	struct pipe_control *pc = ring->private;
180	u32 scratch_addr = pc->gtt_offset + 128;
181	int ret;
182
183
184	ret = intel_ring_begin(ring, 6);
185	if (ret)
186		return ret;
187
188	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
189	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
190			PIPE_CONTROL_STALL_AT_SCOREBOARD);
191	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
192	intel_ring_emit(ring, 0); /* low dword */
193	intel_ring_emit(ring, 0); /* high dword */
194	intel_ring_emit(ring, MI_NOOP);
195	intel_ring_advance(ring);
196
197	ret = intel_ring_begin(ring, 6);
198	if (ret)
199		return ret;
200
201	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
202	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
203	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
204	intel_ring_emit(ring, 0);
205	intel_ring_emit(ring, 0);
206	intel_ring_emit(ring, MI_NOOP);
207	intel_ring_advance(ring);
208
209	return 0;
210}
211
212static int
213gen6_render_ring_flush(struct intel_ring_buffer *ring,
214                         u32 invalidate_domains, u32 flush_domains)
215{
216	u32 flags = 0;
217	struct pipe_control *pc = ring->private;
218	u32 scratch_addr = pc->gtt_offset + 128;
219	int ret;
220
221	/* Force SNB workarounds for PIPE_CONTROL flushes */
222	intel_emit_post_sync_nonzero_flush(ring);
223
224	/* Just flush everything.  Experiments have shown that reducing the
225	 * number of bits based on the write domains has little performance
226	 * impact.
227	 */
228	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
229	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
230	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
231	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
232	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
233	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
234	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
235
236	ret = intel_ring_begin(ring, 6);
237	if (ret)
238		return ret;
239
240	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
241	intel_ring_emit(ring, flags);
242	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
243	intel_ring_emit(ring, 0); /* lower dword */
244	intel_ring_emit(ring, 0); /* uppwer dword */
245	intel_ring_emit(ring, MI_NOOP);
246	intel_ring_advance(ring);
247
248	return 0;
249}
250
251static void ring_write_tail(struct intel_ring_buffer *ring,
252			    uint32_t value)
253{
254	drm_i915_private_t *dev_priv = ring->dev->dev_private;
255	I915_WRITE_TAIL(ring, value);
256}
257
258u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
259{
260	drm_i915_private_t *dev_priv = ring->dev->dev_private;
261	uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
262			RING_ACTHD(ring->mmio_base) : ACTHD;
263
264	return I915_READ(acthd_reg);
265}
266
267static int init_ring_common(struct intel_ring_buffer *ring)
268{
269	drm_i915_private_t *dev_priv = ring->dev->dev_private;
270	struct drm_i915_gem_object *obj = ring->obj;
271	uint32_t head;
272
273	/* Stop the ring if it's running. */
274	I915_WRITE_CTL(ring, 0);
275	I915_WRITE_HEAD(ring, 0);
276	ring->write_tail(ring, 0);
277
278	/* Initialize the ring. */
279	I915_WRITE_START(ring, obj->gtt_offset);
280	head = I915_READ_HEAD(ring) & HEAD_ADDR;
281
282	/* G45 ring initialization fails to reset head to zero */
283	if (head != 0) {
284		DRM_DEBUG("%s head not reset to zero "
285			      "ctl %08x head %08x tail %08x start %08x\n",
286			      ring->name,
287			      I915_READ_CTL(ring),
288			      I915_READ_HEAD(ring),
289			      I915_READ_TAIL(ring),
290			      I915_READ_START(ring));
291
292		I915_WRITE_HEAD(ring, 0);
293
294		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
295			DRM_ERROR("failed to set %s head to zero "
296				  "ctl %08x head %08x tail %08x start %08x\n",
297				  ring->name,
298				  I915_READ_CTL(ring),
299				  I915_READ_HEAD(ring),
300				  I915_READ_TAIL(ring),
301				  I915_READ_START(ring));
302		}
303	}
304
305	I915_WRITE_CTL(ring,
306			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
307			| RING_VALID);
308
309	/* If the head is still not zero, the ring is dead */
310	if (_intel_wait_for(ring->dev,
311	    (I915_READ_CTL(ring) & RING_VALID) != 0 &&
312	     I915_READ_START(ring) == obj->gtt_offset &&
313	     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0,
314	    50, 1, "915rii")) {
315		DRM_ERROR("%s initialization failed "
316				"ctl %08x head %08x tail %08x start %08x\n",
317				ring->name,
318				I915_READ_CTL(ring),
319				I915_READ_HEAD(ring),
320				I915_READ_TAIL(ring),
321				I915_READ_START(ring));
322		return -EIO;
323	}
324
325	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
326		i915_kernel_lost_context(ring->dev);
327	else {
328		ring->head = I915_READ_HEAD(ring);
329		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
330		ring->space = ring_space(ring);
331	}
332
333	return 0;
334}
335
336static int
337init_pipe_control(struct intel_ring_buffer *ring)
338{
339	struct pipe_control *pc;
340	struct drm_i915_gem_object *obj;
341	int ret;
342
343	if (ring->private)
344		return 0;
345
346	pc = malloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
347	if (!pc)
348		return -ENOMEM;
349
350	obj = i915_gem_alloc_object(ring->dev, 4096);
351	if (obj == NULL) {
352		DRM_ERROR("Failed to allocate seqno page\n");
353		ret = -ENOMEM;
354		goto err;
355	}
356
357	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
358
359	ret = i915_gem_object_pin(obj, 4096, true);
360	if (ret)
361		goto err_unref;
362
363	pc->gtt_offset = obj->gtt_offset;
364	pc->cpu_page = (uint32_t *)kva_alloc(PAGE_SIZE);
365	if (pc->cpu_page == NULL)
366		goto err_unpin;
367	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
368	pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
369	    (vm_offset_t)pc->cpu_page + PAGE_SIZE, FALSE);
370
371	pc->obj = obj;
372	ring->private = pc;
373	return 0;
374
375err_unpin:
376	i915_gem_object_unpin(obj);
377err_unref:
378	drm_gem_object_unreference(&obj->base);
379err:
380	free(pc, DRM_I915_GEM);
381	return ret;
382}
383
384static void
385cleanup_pipe_control(struct intel_ring_buffer *ring)
386{
387	struct pipe_control *pc = ring->private;
388	struct drm_i915_gem_object *obj;
389
390	if (!ring->private)
391		return;
392
393	obj = pc->obj;
394	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
395	kva_free((uintptr_t)pc->cpu_page, PAGE_SIZE);
396	i915_gem_object_unpin(obj);
397	drm_gem_object_unreference(&obj->base);
398
399	free(pc, DRM_I915_GEM);
400	ring->private = NULL;
401}
402
403static int init_render_ring(struct intel_ring_buffer *ring)
404{
405	struct drm_device *dev = ring->dev;
406	struct drm_i915_private *dev_priv = dev->dev_private;
407	int ret = init_ring_common(ring);
408
409	if (INTEL_INFO(dev)->gen > 3) {
410		int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
411		I915_WRITE(MI_MODE, mode);
412		if (IS_GEN7(dev))
413			I915_WRITE(GFX_MODE_GEN7,
414				   GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
415				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
416	}
417
418	if (INTEL_INFO(dev)->gen >= 5) {
419		ret = init_pipe_control(ring);
420		if (ret)
421			return ret;
422	}
423
424
425	if (IS_GEN6(dev)) {
426		/* From the Sandybridge PRM, volume 1 part 3, page 24:
427		 * "If this bit is set, STCunit will have LRA as replacement
428		 *  policy. [...] This bit must be reset.  LRA replacement
429		 *  policy is not supported."
430		 */
431		I915_WRITE(CACHE_MODE_0,
432			   CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT);
433
434		/* This is not explicitly set for GEN6, so read the register.
435		 * see intel_ring_mi_set_context() for why we care.
436		 * TODO: consider explicitly setting the bit for GEN5
437		 */
438		ring->itlb_before_ctx_switch =
439			!!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
440	}
441
442	if (INTEL_INFO(dev)->gen >= 6) {
443		I915_WRITE(INSTPM,
444			   INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
445	}
446
447	return ret;
448}
449
450static void render_ring_cleanup(struct intel_ring_buffer *ring)
451{
452	if (!ring->private)
453		return;
454
455	cleanup_pipe_control(ring);
456}
457
458static void
459update_mboxes(struct intel_ring_buffer *ring,
460	    u32 seqno,
461	    u32 mmio_offset)
462{
463	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
464			      MI_SEMAPHORE_GLOBAL_GTT |
465			      MI_SEMAPHORE_REGISTER |
466			      MI_SEMAPHORE_UPDATE);
467	intel_ring_emit(ring, seqno);
468	intel_ring_emit(ring, mmio_offset);
469}
470
471/**
472 * gen6_add_request - Update the semaphore mailbox registers
473 *
474 * @ring - ring that is adding a request
475 * @seqno - return seqno stuck into the ring
476 *
477 * Update the mailbox registers in the *other* rings with the current seqno.
478 * This acts like a signal in the canonical semaphore.
479 */
480static int
481gen6_add_request(struct intel_ring_buffer *ring,
482		 u32 *seqno)
483{
484	u32 mbox1_reg;
485	u32 mbox2_reg;
486	int ret;
487
488	ret = intel_ring_begin(ring, 10);
489	if (ret)
490		return ret;
491
492	mbox1_reg = ring->signal_mbox[0];
493	mbox2_reg = ring->signal_mbox[1];
494
495	*seqno = i915_gem_next_request_seqno(ring);
496
497	update_mboxes(ring, *seqno, mbox1_reg);
498	update_mboxes(ring, *seqno, mbox2_reg);
499	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
500	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
501	intel_ring_emit(ring, *seqno);
502	intel_ring_emit(ring, MI_USER_INTERRUPT);
503	intel_ring_advance(ring);
504
505	return 0;
506}
507
508/**
509 * intel_ring_sync - sync the waiter to the signaller on seqno
510 *
511 * @waiter - ring that is waiting
512 * @signaller - ring which has, or will signal
513 * @seqno - seqno which the waiter will block on
514 */
515static int
516intel_ring_sync(struct intel_ring_buffer *waiter,
517		struct intel_ring_buffer *signaller,
518		int ring,
519		u32 seqno)
520{
521	int ret;
522	u32 dw1 = MI_SEMAPHORE_MBOX |
523		  MI_SEMAPHORE_COMPARE |
524		  MI_SEMAPHORE_REGISTER;
525
526	ret = intel_ring_begin(waiter, 4);
527	if (ret)
528		return ret;
529
530	intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
531	intel_ring_emit(waiter, seqno);
532	intel_ring_emit(waiter, 0);
533	intel_ring_emit(waiter, MI_NOOP);
534	intel_ring_advance(waiter);
535
536	return 0;
537}
538
539int render_ring_sync_to(struct intel_ring_buffer *waiter,
540    struct intel_ring_buffer *signaller, u32 seqno);
541int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
542    struct intel_ring_buffer *signaller, u32 seqno);
543int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
544    struct intel_ring_buffer *signaller, u32 seqno);
545
546/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
547int
548render_ring_sync_to(struct intel_ring_buffer *waiter,
549		    struct intel_ring_buffer *signaller,
550		    u32 seqno)
551{
552	KASSERT(signaller->semaphore_register[RCS] != MI_SEMAPHORE_SYNC_INVALID,
553	    ("valid RCS semaphore"));
554	return intel_ring_sync(waiter,
555			       signaller,
556			       RCS,
557			       seqno);
558}
559
560/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
561int
562gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
563		      struct intel_ring_buffer *signaller,
564		      u32 seqno)
565{
566	KASSERT(signaller->semaphore_register[VCS] != MI_SEMAPHORE_SYNC_INVALID,
567	    ("Valid VCS semaphore"));
568	return intel_ring_sync(waiter,
569			       signaller,
570			       VCS,
571			       seqno);
572}
573
574/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
575int
576gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
577		      struct intel_ring_buffer *signaller,
578		      u32 seqno)
579{
580	KASSERT(signaller->semaphore_register[BCS] != MI_SEMAPHORE_SYNC_INVALID,
581	    ("Valid BCS semaphore"));
582	return intel_ring_sync(waiter,
583			       signaller,
584			       BCS,
585			       seqno);
586}
587
588#define PIPE_CONTROL_FLUSH(ring__, addr__)					\
589do {									\
590	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
591		 PIPE_CONTROL_DEPTH_STALL);				\
592	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
593	intel_ring_emit(ring__, 0);							\
594	intel_ring_emit(ring__, 0);							\
595} while (0)
596
597static int
598pc_render_add_request(struct intel_ring_buffer *ring,
599		      uint32_t *result)
600{
601	u32 seqno = i915_gem_next_request_seqno(ring);
602	struct pipe_control *pc = ring->private;
603	u32 scratch_addr = pc->gtt_offset + 128;
604	int ret;
605
606	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
607	 * incoherent with writes to memory, i.e. completely fubar,
608	 * so we need to use PIPE_NOTIFY instead.
609	 *
610	 * However, we also need to workaround the qword write
611	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
612	 * memory before requesting an interrupt.
613	 */
614	ret = intel_ring_begin(ring, 32);
615	if (ret)
616		return ret;
617
618	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
619			PIPE_CONTROL_WRITE_FLUSH |
620			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
621	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
622	intel_ring_emit(ring, seqno);
623	intel_ring_emit(ring, 0);
624	PIPE_CONTROL_FLUSH(ring, scratch_addr);
625	scratch_addr += 128; /* write to separate cachelines */
626	PIPE_CONTROL_FLUSH(ring, scratch_addr);
627	scratch_addr += 128;
628	PIPE_CONTROL_FLUSH(ring, scratch_addr);
629	scratch_addr += 128;
630	PIPE_CONTROL_FLUSH(ring, scratch_addr);
631	scratch_addr += 128;
632	PIPE_CONTROL_FLUSH(ring, scratch_addr);
633	scratch_addr += 128;
634	PIPE_CONTROL_FLUSH(ring, scratch_addr);
635	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
636			PIPE_CONTROL_WRITE_FLUSH |
637			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
638			PIPE_CONTROL_NOTIFY);
639	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
640	intel_ring_emit(ring, seqno);
641	intel_ring_emit(ring, 0);
642	intel_ring_advance(ring);
643
644	*result = seqno;
645	return 0;
646}
647
648static int
649render_ring_add_request(struct intel_ring_buffer *ring,
650			uint32_t *result)
651{
652	u32 seqno = i915_gem_next_request_seqno(ring);
653	int ret;
654
655	ret = intel_ring_begin(ring, 4);
656	if (ret)
657		return ret;
658
659	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
660	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
661	intel_ring_emit(ring, seqno);
662	intel_ring_emit(ring, MI_USER_INTERRUPT);
663	intel_ring_advance(ring);
664
665	*result = seqno;
666	return 0;
667}
668
669 static u32
670gen6_ring_get_seqno(struct intel_ring_buffer *ring)
671{
672	struct drm_device *dev = ring->dev;
673
674	/* Workaround to force correct ordering between irq and seqno writes on
675	 * ivb (and maybe also on snb) by reading from a CS register (like
676	 * ACTHD) before reading the status page. */
677	if (/* IS_GEN6(dev) || */IS_GEN7(dev))
678		intel_ring_get_active_head(ring);
679	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
680}
681
682static uint32_t
683ring_get_seqno(struct intel_ring_buffer *ring)
684{
685	if (ring->status_page.page_addr == NULL)
686		return (-1);
687	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
688}
689
690static uint32_t
691pc_render_get_seqno(struct intel_ring_buffer *ring)
692{
693	struct pipe_control *pc = ring->private;
694	if (pc != NULL)
695		return pc->cpu_page[0];
696	else
697		return (-1);
698}
699
700static void
701ironlake_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
702{
703	dev_priv->gt_irq_mask &= ~mask;
704	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
705	POSTING_READ(GTIMR);
706}
707
708static void
709ironlake_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
710{
711	dev_priv->gt_irq_mask |= mask;
712	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
713	POSTING_READ(GTIMR);
714}
715
716static void
717i915_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
718{
719	dev_priv->irq_mask &= ~mask;
720	I915_WRITE(IMR, dev_priv->irq_mask);
721	POSTING_READ(IMR);
722}
723
724static void
725i915_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
726{
727	dev_priv->irq_mask |= mask;
728	I915_WRITE(IMR, dev_priv->irq_mask);
729	POSTING_READ(IMR);
730}
731
732static bool
733render_ring_get_irq(struct intel_ring_buffer *ring)
734{
735	struct drm_device *dev = ring->dev;
736	drm_i915_private_t *dev_priv = dev->dev_private;
737
738	if (!dev->irq_enabled)
739		return false;
740
741	mtx_assert(&ring->irq_lock, MA_OWNED);
742	if (ring->irq_refcount++ == 0) {
743		if (HAS_PCH_SPLIT(dev))
744			ironlake_enable_irq(dev_priv,
745					    GT_PIPE_NOTIFY | GT_USER_INTERRUPT);
746		else
747			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
748	}
749
750	return true;
751}
752
753static void
754render_ring_put_irq(struct intel_ring_buffer *ring)
755{
756	struct drm_device *dev = ring->dev;
757	drm_i915_private_t *dev_priv = dev->dev_private;
758
759	mtx_assert(&ring->irq_lock, MA_OWNED);
760	if (--ring->irq_refcount == 0) {
761		if (HAS_PCH_SPLIT(dev))
762			ironlake_disable_irq(dev_priv,
763					     GT_USER_INTERRUPT |
764					     GT_PIPE_NOTIFY);
765		else
766			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
767	}
768}
769
770void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
771{
772	struct drm_device *dev = ring->dev;
773	drm_i915_private_t *dev_priv = dev->dev_private;
774	uint32_t mmio = 0;
775
776	/* The ring status page addresses are no longer next to the rest of
777	 * the ring registers as of gen7.
778	 */
779	if (IS_GEN7(dev)) {
780		switch (ring->id) {
781		case RCS:
782			mmio = RENDER_HWS_PGA_GEN7;
783			break;
784		case BCS:
785			mmio = BLT_HWS_PGA_GEN7;
786			break;
787		case VCS:
788			mmio = BSD_HWS_PGA_GEN7;
789			break;
790		}
791	} else if (IS_GEN6(dev)) {
792		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
793	} else {
794		mmio = RING_HWS_PGA(ring->mmio_base);
795	}
796
797	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
798	POSTING_READ(mmio);
799}
800
801static int
802bsd_ring_flush(struct intel_ring_buffer *ring,
803	       uint32_t     invalidate_domains,
804	       uint32_t     flush_domains)
805{
806	int ret;
807
808	ret = intel_ring_begin(ring, 2);
809	if (ret)
810		return ret;
811
812	intel_ring_emit(ring, MI_FLUSH);
813	intel_ring_emit(ring, MI_NOOP);
814	intel_ring_advance(ring);
815	return 0;
816}
817
818static int
819ring_add_request(struct intel_ring_buffer *ring,
820		 uint32_t *result)
821{
822	uint32_t seqno;
823	int ret;
824
825	ret = intel_ring_begin(ring, 4);
826	if (ret)
827		return ret;
828
829	seqno = i915_gem_next_request_seqno(ring);
830
831	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
832	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
833	intel_ring_emit(ring, seqno);
834	intel_ring_emit(ring, MI_USER_INTERRUPT);
835	intel_ring_advance(ring);
836
837	*result = seqno;
838	return 0;
839}
840
841static bool
842gen6_ring_get_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
843{
844	struct drm_device *dev = ring->dev;
845	drm_i915_private_t *dev_priv = dev->dev_private;
846
847	if (!dev->irq_enabled)
848	       return false;
849
850	gen6_gt_force_wake_get(dev_priv);
851
852	mtx_assert(&ring->irq_lock, MA_OWNED);
853	if (ring->irq_refcount++ == 0) {
854		ring->irq_mask &= ~rflag;
855		I915_WRITE_IMR(ring, ring->irq_mask);
856		ironlake_enable_irq(dev_priv, gflag);
857	}
858
859	return true;
860}
861
862static void
863gen6_ring_put_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
864{
865	struct drm_device *dev = ring->dev;
866	drm_i915_private_t *dev_priv = dev->dev_private;
867
868	mtx_assert(&ring->irq_lock, MA_OWNED);
869	if (--ring->irq_refcount == 0) {
870		ring->irq_mask |= rflag;
871		I915_WRITE_IMR(ring, ring->irq_mask);
872		ironlake_disable_irq(dev_priv, gflag);
873	}
874
875	gen6_gt_force_wake_put(dev_priv);
876}
877
878static bool
879bsd_ring_get_irq(struct intel_ring_buffer *ring)
880{
881	struct drm_device *dev = ring->dev;
882	drm_i915_private_t *dev_priv = dev->dev_private;
883
884	if (!dev->irq_enabled)
885		return false;
886
887	mtx_assert(&ring->irq_lock, MA_OWNED);
888	if (ring->irq_refcount++ == 0) {
889		if (IS_G4X(dev))
890			i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
891		else
892			ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
893	}
894
895	return true;
896}
897static void
898bsd_ring_put_irq(struct intel_ring_buffer *ring)
899{
900	struct drm_device *dev = ring->dev;
901	drm_i915_private_t *dev_priv = dev->dev_private;
902
903	mtx_assert(&ring->irq_lock, MA_OWNED);
904	if (--ring->irq_refcount == 0) {
905		if (IS_G4X(dev))
906			i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
907		else
908			ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
909	}
910}
911
912static int
913ring_dispatch_execbuffer(struct intel_ring_buffer *ring, uint32_t offset,
914    uint32_t length)
915{
916	int ret;
917
918	ret = intel_ring_begin(ring, 2);
919	if (ret)
920		return ret;
921
922	intel_ring_emit(ring,
923			MI_BATCH_BUFFER_START | (2 << 6) |
924			MI_BATCH_NON_SECURE_I965);
925	intel_ring_emit(ring, offset);
926	intel_ring_advance(ring);
927
928	return 0;
929}
930
931static int
932render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
933				uint32_t offset, uint32_t len)
934{
935	struct drm_device *dev = ring->dev;
936	int ret;
937
938	if (IS_I830(dev) || IS_845G(dev)) {
939		ret = intel_ring_begin(ring, 4);
940		if (ret)
941			return ret;
942
943		intel_ring_emit(ring, MI_BATCH_BUFFER);
944		intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
945		intel_ring_emit(ring, offset + len - 8);
946		intel_ring_emit(ring, 0);
947	} else {
948		ret = intel_ring_begin(ring, 2);
949		if (ret)
950			return ret;
951
952		if (INTEL_INFO(dev)->gen >= 4) {
953			intel_ring_emit(ring,
954					MI_BATCH_BUFFER_START | (2 << 6) |
955					MI_BATCH_NON_SECURE_I965);
956			intel_ring_emit(ring, offset);
957		} else {
958			intel_ring_emit(ring,
959					MI_BATCH_BUFFER_START | (2 << 6));
960			intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
961		}
962	}
963	intel_ring_advance(ring);
964
965	return 0;
966}
967
968static void cleanup_status_page(struct intel_ring_buffer *ring)
969{
970	drm_i915_private_t *dev_priv = ring->dev->dev_private;
971	struct drm_i915_gem_object *obj;
972
973	obj = ring->status_page.obj;
974	if (obj == NULL)
975		return;
976
977	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
978	kva_free((vm_offset_t)ring->status_page.page_addr,
979	    PAGE_SIZE);
980	i915_gem_object_unpin(obj);
981	drm_gem_object_unreference(&obj->base);
982	ring->status_page.obj = NULL;
983
984	memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
985}
986
987static int init_status_page(struct intel_ring_buffer *ring)
988{
989	struct drm_device *dev = ring->dev;
990	drm_i915_private_t *dev_priv = dev->dev_private;
991	struct drm_i915_gem_object *obj;
992	int ret;
993
994	obj = i915_gem_alloc_object(dev, 4096);
995	if (obj == NULL) {
996		DRM_ERROR("Failed to allocate status page\n");
997		ret = -ENOMEM;
998		goto err;
999	}
1000
1001	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1002
1003	ret = i915_gem_object_pin(obj, 4096, true);
1004	if (ret != 0) {
1005		goto err_unref;
1006	}
1007
1008	ring->status_page.gfx_addr = obj->gtt_offset;
1009	ring->status_page.page_addr = (void *)kva_alloc(PAGE_SIZE);
1010	if (ring->status_page.page_addr == NULL) {
1011		memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
1012		goto err_unpin;
1013	}
1014	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1015	    1);
1016	pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
1017	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE, FALSE);
1018	ring->status_page.obj = obj;
1019	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1020
1021	intel_ring_setup_status_page(ring);
1022	DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n",
1023			ring->name, ring->status_page.gfx_addr);
1024
1025	return 0;
1026
1027err_unpin:
1028	i915_gem_object_unpin(obj);
1029err_unref:
1030	drm_gem_object_unreference(&obj->base);
1031err:
1032	return ret;
1033}
1034
1035static
1036int intel_init_ring_buffer(struct drm_device *dev,
1037			   struct intel_ring_buffer *ring)
1038{
1039	struct drm_i915_gem_object *obj;
1040	int ret;
1041
1042	ring->dev = dev;
1043	INIT_LIST_HEAD(&ring->active_list);
1044	INIT_LIST_HEAD(&ring->request_list);
1045	INIT_LIST_HEAD(&ring->gpu_write_list);
1046
1047	mtx_init(&ring->irq_lock, "ringb", NULL, MTX_DEF);
1048	ring->irq_mask = ~0;
1049
1050	if (I915_NEED_GFX_HWS(dev)) {
1051		ret = init_status_page(ring);
1052		if (ret)
1053			return ret;
1054	}
1055
1056	obj = i915_gem_alloc_object(dev, ring->size);
1057	if (obj == NULL) {
1058		DRM_ERROR("Failed to allocate ringbuffer\n");
1059		ret = -ENOMEM;
1060		goto err_hws;
1061	}
1062
1063	ring->obj = obj;
1064
1065	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1066	if (ret)
1067		goto err_unref;
1068
1069	ring->map.size = ring->size;
1070	ring->map.offset = dev->agp->base + obj->gtt_offset;
1071	ring->map.type = 0;
1072	ring->map.flags = 0;
1073	ring->map.mtrr = 0;
1074
1075	drm_core_ioremap_wc(&ring->map, dev);
1076	if (ring->map.virtual == NULL) {
1077		DRM_ERROR("Failed to map ringbuffer.\n");
1078		ret = -EINVAL;
1079		goto err_unpin;
1080	}
1081
1082	ring->virtual_start = ring->map.virtual;
1083	ret = ring->init(ring);
1084	if (ret)
1085		goto err_unmap;
1086
1087	/* Workaround an erratum on the i830 which causes a hang if
1088	 * the TAIL pointer points to within the last 2 cachelines
1089	 * of the buffer.
1090	 */
1091	ring->effective_size = ring->size;
1092	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1093		ring->effective_size -= 128;
1094
1095	return 0;
1096
1097err_unmap:
1098	drm_core_ioremapfree(&ring->map, dev);
1099err_unpin:
1100	i915_gem_object_unpin(obj);
1101err_unref:
1102	drm_gem_object_unreference(&obj->base);
1103	ring->obj = NULL;
1104err_hws:
1105	cleanup_status_page(ring);
1106	return ret;
1107}
1108
1109void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1110{
1111	struct drm_i915_private *dev_priv;
1112	int ret;
1113
1114	if (ring->obj == NULL)
1115		return;
1116
1117	/* Disable the ring buffer. The ring must be idle at this point */
1118	dev_priv = ring->dev->dev_private;
1119	ret = intel_wait_ring_idle(ring);
1120	I915_WRITE_CTL(ring, 0);
1121
1122	drm_core_ioremapfree(&ring->map, ring->dev);
1123
1124	i915_gem_object_unpin(ring->obj);
1125	drm_gem_object_unreference(&ring->obj->base);
1126	ring->obj = NULL;
1127
1128	if (ring->cleanup)
1129		ring->cleanup(ring);
1130
1131	cleanup_status_page(ring);
1132}
1133
1134static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1135{
1136	unsigned int *virt;
1137	int rem = ring->size - ring->tail;
1138
1139	if (ring->space < rem) {
1140		int ret = intel_wait_ring_buffer(ring, rem);
1141		if (ret)
1142			return ret;
1143	}
1144
1145	virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1146	rem /= 8;
1147	while (rem--) {
1148		*virt++ = MI_NOOP;
1149		*virt++ = MI_NOOP;
1150	}
1151
1152	ring->tail = 0;
1153	ring->space = ring_space(ring);
1154
1155	return 0;
1156}
1157
1158static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1159{
1160	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1161	bool was_interruptible;
1162	int ret;
1163
1164	/* XXX As we have not yet audited all the paths to check that
1165	 * they are ready for ERESTARTSYS from intel_ring_begin, do not
1166	 * allow us to be interruptible by a signal.
1167	 */
1168	was_interruptible = dev_priv->mm.interruptible;
1169	dev_priv->mm.interruptible = false;
1170
1171	ret = i915_wait_request(ring, seqno, true);
1172
1173	dev_priv->mm.interruptible = was_interruptible;
1174
1175	return ret;
1176}
1177
1178static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1179{
1180	struct drm_i915_gem_request *request;
1181	u32 seqno = 0;
1182	int ret;
1183
1184	i915_gem_retire_requests_ring(ring);
1185
1186	if (ring->last_retired_head != -1) {
1187		ring->head = ring->last_retired_head;
1188		ring->last_retired_head = -1;
1189		ring->space = ring_space(ring);
1190		if (ring->space >= n)
1191			return 0;
1192	}
1193
1194	list_for_each_entry(request, &ring->request_list, list) {
1195		int space;
1196
1197		if (request->tail == -1)
1198			continue;
1199
1200		space = request->tail - (ring->tail + 8);
1201		if (space < 0)
1202			space += ring->size;
1203		if (space >= n) {
1204			seqno = request->seqno;
1205			break;
1206		}
1207
1208		/* Consume this request in case we need more space than
1209		 * is available and so need to prevent a race between
1210		 * updating last_retired_head and direct reads of
1211		 * I915_RING_HEAD. It also provides a nice sanity check.
1212		 */
1213		request->tail = -1;
1214	}
1215
1216	if (seqno == 0)
1217		return -ENOSPC;
1218
1219	ret = intel_ring_wait_seqno(ring, seqno);
1220	if (ret)
1221		return ret;
1222
1223	if (ring->last_retired_head == -1)
1224		return -ENOSPC;
1225
1226	ring->head = ring->last_retired_head;
1227	ring->last_retired_head = -1;
1228	ring->space = ring_space(ring);
1229	if (ring->space < n)
1230		return -ENOSPC;
1231
1232	return 0;
1233}
1234
1235int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1236{
1237	struct drm_device *dev = ring->dev;
1238	struct drm_i915_private *dev_priv = dev->dev_private;
1239	int end;
1240	int ret;
1241
1242	ret = intel_ring_wait_request(ring, n);
1243	if (ret != -ENOSPC)
1244		return ret;
1245
1246	CTR1(KTR_DRM, "ring_wait_begin %s", ring->name);
1247	if (drm_core_check_feature(dev, DRIVER_GEM))
1248		/* With GEM the hangcheck timer should kick us out of the loop,
1249		 * leaving it early runs the risk of corrupting GEM state (due
1250		 * to running on almost untested codepaths). But on resume
1251		 * timers don't work yet, so prevent a complete hang in that
1252		 * case by choosing an insanely large timeout. */
1253		end = ticks + hz * 60;
1254	else
1255		end = ticks + hz * 3;
1256	do {
1257		ring->head = I915_READ_HEAD(ring);
1258		ring->space = ring_space(ring);
1259		if (ring->space >= n) {
1260			CTR1(KTR_DRM, "ring_wait_end %s", ring->name);
1261			return 0;
1262		}
1263
1264#if 0
1265		if (dev->primary->master) {
1266			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1267			if (master_priv->sarea_priv)
1268				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1269		}
1270#else
1271		if (dev_priv->sarea_priv)
1272			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1273#endif
1274
1275		pause("915rng", 1);
1276		if (atomic_load_acq_32(&dev_priv->mm.wedged) != 0) {
1277			CTR1(KTR_DRM, "ring_wait_end %s wedged", ring->name);
1278			return -EAGAIN;
1279		}
1280	} while (!time_after(ticks, end));
1281	CTR1(KTR_DRM, "ring_wait_end %s busy", ring->name);
1282	return -EBUSY;
1283}
1284
1285int intel_ring_begin(struct intel_ring_buffer *ring,
1286		     int num_dwords)
1287{
1288	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1289	int n = 4*num_dwords;
1290	int ret;
1291
1292	if (atomic_load_acq_int(&dev_priv->mm.wedged))
1293		return -EIO;
1294
1295	if (ring->tail + n > ring->effective_size) {
1296		ret = intel_wrap_ring_buffer(ring);
1297		if (ret != 0)
1298			return ret;
1299	}
1300
1301	if (ring->space < n) {
1302		ret = intel_wait_ring_buffer(ring, n);
1303		if (ret != 0)
1304			return ret;
1305	}
1306
1307	ring->space -= n;
1308	return 0;
1309}
1310
1311void intel_ring_advance(struct intel_ring_buffer *ring)
1312{
1313	ring->tail &= ring->size - 1;
1314	ring->write_tail(ring, ring->tail);
1315}
1316
1317static const struct intel_ring_buffer render_ring = {
1318	.name			= "render ring",
1319	.id			= RCS,
1320	.mmio_base		= RENDER_RING_BASE,
1321	.size			= 32 * PAGE_SIZE,
1322	.init			= init_render_ring,
1323	.write_tail		= ring_write_tail,
1324	.flush			= render_ring_flush,
1325	.add_request		= render_ring_add_request,
1326	.get_seqno		= ring_get_seqno,
1327	.irq_get		= render_ring_get_irq,
1328	.irq_put		= render_ring_put_irq,
1329	.dispatch_execbuffer	= render_ring_dispatch_execbuffer,
1330	.cleanup		= render_ring_cleanup,
1331	.sync_to		= render_ring_sync_to,
1332	.semaphore_register	= {MI_SEMAPHORE_SYNC_INVALID,
1333				   MI_SEMAPHORE_SYNC_RV,
1334				   MI_SEMAPHORE_SYNC_RB},
1335	.signal_mbox		= {GEN6_VRSYNC, GEN6_BRSYNC},
1336};
1337
1338/* ring buffer for bit-stream decoder */
1339
1340static const struct intel_ring_buffer bsd_ring = {
1341	.name                   = "bsd ring",
1342	.id			= VCS,
1343	.mmio_base		= BSD_RING_BASE,
1344	.size			= 32 * PAGE_SIZE,
1345	.init			= init_ring_common,
1346	.write_tail		= ring_write_tail,
1347	.flush			= bsd_ring_flush,
1348	.add_request		= ring_add_request,
1349	.get_seqno		= ring_get_seqno,
1350	.irq_get		= bsd_ring_get_irq,
1351	.irq_put		= bsd_ring_put_irq,
1352	.dispatch_execbuffer	= ring_dispatch_execbuffer,
1353};
1354
1355
1356static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1357				     uint32_t value)
1358{
1359	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1360
1361	/* Every tail move must follow the sequence below */
1362	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1363	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1364	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
1365	I915_WRITE(GEN6_BSD_RNCID, 0x0);
1366
1367	if (_intel_wait_for(ring->dev,
1368	    (I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1369	     GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0, 50,
1370	    true, "915g6i") != 0)
1371		DRM_ERROR("timed out waiting for IDLE Indicator\n");
1372
1373	I915_WRITE_TAIL(ring, value);
1374	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1375	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1376	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
1377}
1378
1379static int gen6_ring_flush(struct intel_ring_buffer *ring,
1380			   uint32_t invalidate, uint32_t flush)
1381{
1382	uint32_t cmd;
1383	int ret;
1384
1385	ret = intel_ring_begin(ring, 4);
1386	if (ret)
1387		return ret;
1388
1389	cmd = MI_FLUSH_DW;
1390	if (invalidate & I915_GEM_GPU_DOMAINS)
1391		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1392	intel_ring_emit(ring, cmd);
1393	intel_ring_emit(ring, 0);
1394	intel_ring_emit(ring, 0);
1395	intel_ring_emit(ring, MI_NOOP);
1396	intel_ring_advance(ring);
1397	return 0;
1398}
1399
1400static int
1401gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1402			      uint32_t offset, uint32_t len)
1403{
1404	int ret;
1405
1406	ret = intel_ring_begin(ring, 2);
1407	if (ret)
1408		return ret;
1409
1410	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1411	/* bit0-7 is the length on GEN6+ */
1412	intel_ring_emit(ring, offset);
1413	intel_ring_advance(ring);
1414
1415	return 0;
1416}
1417
1418static bool
1419gen6_render_ring_get_irq(struct intel_ring_buffer *ring)
1420{
1421	return gen6_ring_get_irq(ring,
1422				 GT_USER_INTERRUPT,
1423				 GEN6_RENDER_USER_INTERRUPT);
1424}
1425
1426static void
1427gen6_render_ring_put_irq(struct intel_ring_buffer *ring)
1428{
1429	return gen6_ring_put_irq(ring,
1430				 GT_USER_INTERRUPT,
1431				 GEN6_RENDER_USER_INTERRUPT);
1432}
1433
1434static bool
1435gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring)
1436{
1437	return gen6_ring_get_irq(ring,
1438				 GT_GEN6_BSD_USER_INTERRUPT,
1439				 GEN6_BSD_USER_INTERRUPT);
1440}
1441
1442static void
1443gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring)
1444{
1445	return gen6_ring_put_irq(ring,
1446				 GT_GEN6_BSD_USER_INTERRUPT,
1447				 GEN6_BSD_USER_INTERRUPT);
1448}
1449
1450/* ring buffer for Video Codec for Gen6+ */
1451static const struct intel_ring_buffer gen6_bsd_ring = {
1452	.name			= "gen6 bsd ring",
1453	.id			= VCS,
1454	.mmio_base		= GEN6_BSD_RING_BASE,
1455	.size			= 32 * PAGE_SIZE,
1456	.init			= init_ring_common,
1457	.write_tail		= gen6_bsd_ring_write_tail,
1458	.flush			= gen6_ring_flush,
1459	.add_request		= gen6_add_request,
1460	.get_seqno		= gen6_ring_get_seqno,
1461	.irq_get		= gen6_bsd_ring_get_irq,
1462	.irq_put		= gen6_bsd_ring_put_irq,
1463	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1464	.sync_to		= gen6_bsd_ring_sync_to,
1465	.semaphore_register	= {MI_SEMAPHORE_SYNC_VR,
1466				   MI_SEMAPHORE_SYNC_INVALID,
1467				   MI_SEMAPHORE_SYNC_VB},
1468	.signal_mbox		= {GEN6_RVSYNC, GEN6_BVSYNC},
1469};
1470
1471/* Blitter support (SandyBridge+) */
1472
1473static bool
1474blt_ring_get_irq(struct intel_ring_buffer *ring)
1475{
1476	return gen6_ring_get_irq(ring,
1477				 GT_BLT_USER_INTERRUPT,
1478				 GEN6_BLITTER_USER_INTERRUPT);
1479}
1480
1481static void
1482blt_ring_put_irq(struct intel_ring_buffer *ring)
1483{
1484	gen6_ring_put_irq(ring,
1485			  GT_BLT_USER_INTERRUPT,
1486			  GEN6_BLITTER_USER_INTERRUPT);
1487}
1488
1489static int blt_ring_flush(struct intel_ring_buffer *ring,
1490			  uint32_t invalidate, uint32_t flush)
1491{
1492	uint32_t cmd;
1493	int ret;
1494
1495	ret = intel_ring_begin(ring, 4);
1496	if (ret)
1497		return ret;
1498
1499	cmd = MI_FLUSH_DW;
1500	if (invalidate & I915_GEM_DOMAIN_RENDER)
1501		cmd |= MI_INVALIDATE_TLB;
1502	intel_ring_emit(ring, cmd);
1503	intel_ring_emit(ring, 0);
1504	intel_ring_emit(ring, 0);
1505	intel_ring_emit(ring, MI_NOOP);
1506	intel_ring_advance(ring);
1507	return 0;
1508}
1509
1510static const struct intel_ring_buffer gen6_blt_ring = {
1511	.name			= "blt ring",
1512	.id			= BCS,
1513	.mmio_base		= BLT_RING_BASE,
1514	.size			= 32 * PAGE_SIZE,
1515	.init			= init_ring_common,
1516	.write_tail		= ring_write_tail,
1517	.flush			= blt_ring_flush,
1518	.add_request		= gen6_add_request,
1519	.get_seqno		= gen6_ring_get_seqno,
1520	.irq_get		= blt_ring_get_irq,
1521	.irq_put		= blt_ring_put_irq,
1522	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1523	.sync_to		= gen6_blt_ring_sync_to,
1524	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
1525				   MI_SEMAPHORE_SYNC_BV,
1526				   MI_SEMAPHORE_SYNC_INVALID},
1527	.signal_mbox		= {GEN6_RBSYNC, GEN6_VBSYNC},
1528};
1529
1530int intel_init_render_ring_buffer(struct drm_device *dev)
1531{
1532	drm_i915_private_t *dev_priv = dev->dev_private;
1533	struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1534
1535	*ring = render_ring;
1536	if (INTEL_INFO(dev)->gen >= 6) {
1537		ring->add_request = gen6_add_request;
1538		ring->flush = gen6_render_ring_flush;
1539		ring->irq_get = gen6_render_ring_get_irq;
1540		ring->irq_put = gen6_render_ring_put_irq;
1541		ring->get_seqno = gen6_ring_get_seqno;
1542	} else if (IS_GEN5(dev)) {
1543		ring->add_request = pc_render_add_request;
1544		ring->get_seqno = pc_render_get_seqno;
1545	}
1546
1547	if (!I915_NEED_GFX_HWS(dev)) {
1548		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1549		memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1550	}
1551
1552	return intel_init_ring_buffer(dev, ring);
1553}
1554
1555int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start,
1556    uint32_t size)
1557{
1558	drm_i915_private_t *dev_priv = dev->dev_private;
1559	struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1560
1561	*ring = render_ring;
1562	if (INTEL_INFO(dev)->gen >= 6) {
1563		ring->add_request = gen6_add_request;
1564		ring->irq_get = gen6_render_ring_get_irq;
1565		ring->irq_put = gen6_render_ring_put_irq;
1566	} else if (IS_GEN5(dev)) {
1567		ring->add_request = pc_render_add_request;
1568		ring->get_seqno = pc_render_get_seqno;
1569	}
1570
1571	ring->dev = dev;
1572	INIT_LIST_HEAD(&ring->active_list);
1573	INIT_LIST_HEAD(&ring->request_list);
1574	INIT_LIST_HEAD(&ring->gpu_write_list);
1575
1576	ring->size = size;
1577	ring->effective_size = ring->size;
1578	if (IS_I830(ring->dev))
1579		ring->effective_size -= 128;
1580
1581	ring->map.offset = start;
1582	ring->map.size = size;
1583	ring->map.type = 0;
1584	ring->map.flags = 0;
1585	ring->map.mtrr = 0;
1586
1587	drm_core_ioremap_wc(&ring->map, dev);
1588	if (ring->map.virtual == NULL) {
1589		DRM_ERROR("can not ioremap virtual address for"
1590			  " ring buffer\n");
1591		return -ENOMEM;
1592	}
1593
1594	ring->virtual_start = (void *)ring->map.virtual;
1595	return 0;
1596}
1597
1598int intel_init_bsd_ring_buffer(struct drm_device *dev)
1599{
1600	drm_i915_private_t *dev_priv = dev->dev_private;
1601	struct intel_ring_buffer *ring = &dev_priv->rings[VCS];
1602
1603	if (IS_GEN6(dev) || IS_GEN7(dev))
1604		*ring = gen6_bsd_ring;
1605	else
1606		*ring = bsd_ring;
1607
1608	return intel_init_ring_buffer(dev, ring);
1609}
1610
1611int intel_init_blt_ring_buffer(struct drm_device *dev)
1612{
1613	drm_i915_private_t *dev_priv = dev->dev_private;
1614	struct intel_ring_buffer *ring = &dev_priv->rings[BCS];
1615
1616	*ring = gen6_blt_ring;
1617
1618	return intel_init_ring_buffer(dev, ring);
1619}
1620