1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26#ifndef __IOAT_INTERNAL_H__
27#define __IOAT_INTERNAL_H__
28
29#include <sys/_task.h>
30
31#define	DEVICE2SOFTC(dev)	((struct ioat_softc *) device_get_softc(dev))
32#define	KTR_IOAT		KTR_SPARE3
33
34#define	ioat_read_chancnt(ioat) \
35	ioat_read_1((ioat), IOAT_CHANCNT_OFFSET)
36
37#define	ioat_read_xfercap(ioat) \
38	(ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK)
39
40#define	ioat_write_intrctrl(ioat, value) \
41	ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value))
42
43#define	ioat_read_cbver(ioat) \
44	(ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF)
45
46#define	ioat_read_dmacapability(ioat) \
47	ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET)
48
49#define	ioat_write_chanctrl(ioat, value) \
50	ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value))
51
52static __inline uint64_t
53ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,
54    bus_space_handle_t handle, bus_size_t offset)
55{
56	return (bus_space_read_4(tag, handle, offset) |
57	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
58}
59
60static __inline void
61ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,
62    bus_space_handle_t handle, bus_size_t offset, uint64_t val)
63{
64	bus_space_write_4(tag, handle, offset, val);
65	bus_space_write_4(tag, handle, offset + 4, val >> 32);
66}
67
68#ifdef __i386__
69#define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first
70#define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first
71#else
72#define ioat_bus_space_read_8(tag, handle, offset) \
73	bus_space_read_8((tag), (handle), (offset))
74#define ioat_bus_space_write_8(tag, handle, offset, val) \
75	bus_space_write_8((tag), (handle), (offset), (val))
76#endif
77
78#define ioat_read_1(ioat, offset) \
79	bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
80	    (offset))
81
82#define ioat_read_2(ioat, offset) \
83	bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
84	    (offset))
85
86#define ioat_read_4(ioat, offset) \
87	bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
88	    (offset))
89
90#define ioat_read_8(ioat, offset) \
91	ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
92	    (offset))
93
94#define ioat_read_double_4(ioat, offset) \
95	ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \
96	    (ioat)->pci_bus_handle, (offset))
97
98#define ioat_write_1(ioat, offset, value) \
99	bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
100	    (offset), (value))
101
102#define ioat_write_2(ioat, offset, value) \
103	bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
104	    (offset), (value))
105
106#define ioat_write_4(ioat, offset, value) \
107	bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
108	    (offset), (value))
109
110#define ioat_write_8(ioat, offset, value) \
111	ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
112	    (offset), (value))
113
114#define ioat_write_double_4(ioat, offset, value) \
115	ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \
116	    (ioat)->pci_bus_handle, (offset), (value))
117
118MALLOC_DECLARE(M_IOAT);
119
120SYSCTL_DECL(_hw_ioat);
121
122extern int g_ioat_debug_level;
123
124struct generic_dma_control {
125	uint32_t int_enable:1;
126	uint32_t src_snoop_disable:1;
127	uint32_t dest_snoop_disable:1;
128	uint32_t completion_update:1;
129	uint32_t fence:1;
130	uint32_t reserved1:1;
131	uint32_t src_page_break:1;
132	uint32_t dest_page_break:1;
133	uint32_t bundle:1;
134	uint32_t dest_dca:1;
135	uint32_t hint:1;
136	uint32_t reserved2:13;
137	uint32_t op:8;
138};
139
140struct ioat_generic_hw_descriptor {
141	uint32_t size;
142	union {
143		uint32_t control_raw;
144		struct generic_dma_control control_generic;
145	} u;
146	uint64_t src_addr;
147	uint64_t dest_addr;
148	uint64_t next;
149	uint64_t reserved[4];
150};
151
152struct ioat_dma_hw_descriptor {
153	uint32_t size;
154	union {
155		uint32_t control_raw;
156		struct generic_dma_control control_generic;
157		struct {
158			uint32_t int_enable:1;
159			uint32_t src_snoop_disable:1;
160			uint32_t dest_snoop_disable:1;
161			uint32_t completion_update:1;
162			uint32_t fence:1;
163			uint32_t null:1;
164			uint32_t src_page_break:1;
165			uint32_t dest_page_break:1;
166			uint32_t bundle:1;
167			uint32_t dest_dca:1;
168			uint32_t hint:1;
169			uint32_t reserved:13;
170			#define IOAT_OP_COPY 0x00
171			uint32_t op:8;
172		} control;
173	} u;
174	uint64_t src_addr;
175	uint64_t dest_addr;
176	uint64_t next;
177	uint64_t next_src_addr;
178	uint64_t next_dest_addr;
179	uint64_t user1;
180	uint64_t user2;
181};
182
183struct ioat_fill_hw_descriptor {
184	uint32_t size;
185	union {
186		uint32_t control_raw;
187		struct generic_dma_control control_generic;
188		struct {
189			uint32_t int_enable:1;
190			uint32_t reserved:1;
191			uint32_t dest_snoop_disable:1;
192			uint32_t completion_update:1;
193			uint32_t fence:1;
194			uint32_t reserved2:2;
195			uint32_t dest_page_break:1;
196			uint32_t bundle:1;
197			uint32_t reserved3:15;
198			#define IOAT_OP_FILL 0x01
199			uint32_t op:8;
200		} control;
201	} u;
202	uint64_t src_data;
203	uint64_t dest_addr;
204	uint64_t next;
205	uint64_t reserved;
206	uint64_t next_dest_addr;
207	uint64_t user1;
208	uint64_t user2;
209};
210
211struct ioat_crc32_hw_descriptor {
212	uint32_t size;
213	union {
214		uint32_t control_raw;
215		struct generic_dma_control control_generic;
216		struct {
217			uint32_t int_enable:1;
218			uint32_t src_snoop_disable:1;
219			uint32_t dest_snoop_disable:1;
220			uint32_t completion_update:1;
221			uint32_t fence:1;
222			uint32_t reserved1:3;
223			uint32_t bundle:1;
224			uint32_t dest_dca:1;
225			uint32_t hint:1;
226			uint32_t use_seed:1;
227			/*
228			 * crc_location:
229			 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST:
230			 * 0: comparison value is pointed to by CRC Address
231			 *    field.
232			 * 1: comparison value follows data in wire format
233			 *    ("inverted reflected bit order") in the 4 bytes
234			 *    following the source data.
235			 *
236			 * For IOAT_OP_CRC_STORE:
237			 * 0: Result will be stored at location pointed to by
238			 *    CRC Address field (in wire format).
239			 * 1: Result will be stored directly following the
240			 *    source data.
241			 *
242			 * For IOAT_OP_MOVECRC_STORE:
243			 * 0: Result will be stored at location pointed to by
244			 *    CRC Address field (in wire format).
245			 * 1: Result will be stored directly following the
246			 *    *destination* data.
247			 */
248			uint32_t crc_location:1;
249			uint32_t reserved2:11;
250			/*
251			 * MOVECRC - Move data in the same way as standard copy
252			 * operation, but also compute CRC32.
253			 *
254			 * CRC - Only compute CRC on source data.
255			 *
256			 * There is a CRC accumulator register in the hardware.
257			 * If 'initial' is set, it is initialized to the value
258			 * in 'seed.'
259			 *
260			 * In all modes, these operators accumulate size bytes
261			 * at src_addr into the running CRC32C.
262			 *
263			 * Store mode emits the accumulated CRC, in wire
264			 * format, as specified by the crc_location bit above.
265			 *
266			 * Test mode compares the accumulated CRC against the
267			 * reference CRC, as described in crc_location above.
268			 * On failure, halts the DMA engine with a CRC error
269			 * status.
270			 */
271			#define	IOAT_OP_MOVECRC		0x41
272			#define	IOAT_OP_MOVECRC_TEST	0x42
273			#define	IOAT_OP_MOVECRC_STORE	0x43
274			#define	IOAT_OP_CRC		0x81
275			#define	IOAT_OP_CRC_TEST	0x82
276			#define	IOAT_OP_CRC_STORE	0x83
277			uint32_t op:8;
278		} control;
279	} u;
280	uint64_t src_addr;
281	uint64_t dest_addr;
282	uint64_t next;
283	uint64_t next_src_addr;
284	uint64_t next_dest_addr;
285	uint32_t seed;
286	uint32_t reserved;
287	uint64_t crc_address;
288};
289
290struct ioat_xor_hw_descriptor {
291	uint32_t size;
292	union {
293		uint32_t control_raw;
294		struct generic_dma_control control_generic;
295		struct {
296			uint32_t int_enable:1;
297			uint32_t src_snoop_disable:1;
298			uint32_t dest_snoop_disable:1;
299			uint32_t completion_update:1;
300			uint32_t fence:1;
301			uint32_t src_count:3;
302			uint32_t bundle:1;
303			uint32_t dest_dca:1;
304			uint32_t hint:1;
305			uint32_t reserved:13;
306			#define IOAT_OP_XOR 0x87
307			#define IOAT_OP_XOR_VAL 0x88
308			uint32_t op:8;
309		} control;
310	} u;
311	uint64_t src_addr;
312	uint64_t dest_addr;
313	uint64_t next;
314	uint64_t src_addr2;
315	uint64_t src_addr3;
316	uint64_t src_addr4;
317	uint64_t src_addr5;
318};
319
320struct ioat_xor_ext_hw_descriptor {
321	uint64_t src_addr6;
322	uint64_t src_addr7;
323	uint64_t src_addr8;
324	uint64_t next;
325	uint64_t reserved[4];
326};
327
328struct ioat_pq_hw_descriptor {
329	uint32_t size;
330	union {
331		uint32_t control_raw;
332		struct generic_dma_control control_generic;
333		struct {
334			uint32_t int_enable:1;
335			uint32_t src_snoop_disable:1;
336			uint32_t dest_snoop_disable:1;
337			uint32_t completion_update:1;
338			uint32_t fence:1;
339			uint32_t src_count:3;
340			uint32_t bundle:1;
341			uint32_t dest_dca:1;
342			uint32_t hint:1;
343			uint32_t p_disable:1;
344			uint32_t q_disable:1;
345			uint32_t reserved:11;
346			#define IOAT_OP_PQ 0x89
347			#define IOAT_OP_PQ_VAL 0x8a
348			uint32_t op:8;
349		} control;
350	} u;
351	uint64_t src_addr;
352	uint64_t p_addr;
353	uint64_t next;
354	uint64_t src_addr2;
355	uint64_t src_addr3;
356	uint8_t  coef[8];
357	uint64_t q_addr;
358};
359
360struct ioat_pq_ext_hw_descriptor {
361	uint64_t src_addr4;
362	uint64_t src_addr5;
363	uint64_t src_addr6;
364	uint64_t next;
365	uint64_t src_addr7;
366	uint64_t src_addr8;
367	uint64_t reserved[2];
368};
369
370struct ioat_pq_update_hw_descriptor {
371	uint32_t size;
372	union {
373		uint32_t control_raw;
374		struct generic_dma_control control_generic;
375		struct {
376			uint32_t int_enable:1;
377			uint32_t src_snoop_disable:1;
378			uint32_t dest_snoop_disable:1;
379			uint32_t completion_update:1;
380			uint32_t fence:1;
381			uint32_t src_cnt:3;
382			uint32_t bundle:1;
383			uint32_t dest_dca:1;
384			uint32_t hint:1;
385			uint32_t p_disable:1;
386			uint32_t q_disable:1;
387			uint32_t reserved:3;
388			uint32_t coef:8;
389			#define IOAT_OP_PQ_UP 0x8b
390			uint32_t op:8;
391		} control;
392	} u;
393	uint64_t src_addr;
394	uint64_t p_addr;
395	uint64_t next;
396	uint64_t src_addr2;
397	uint64_t p_src;
398	uint64_t q_src;
399	uint64_t q_addr;
400};
401
402struct ioat_raw_hw_descriptor {
403	uint64_t field[8];
404};
405
406struct bus_dmadesc {
407	bus_dmaengine_callback_t callback_fn;
408	void			 *callback_arg;
409};
410
411struct ioat_descriptor {
412	struct bus_dmadesc	bus_dmadesc;
413	uint32_t		id;
414	bus_dmamap_t		src_dmamap;
415	bus_dmamap_t		dst_dmamap;
416	bus_dmamap_t		src2_dmamap;
417	bus_dmamap_t		dst2_dmamap;
418};
419
420/* Unused by this driver at this time. */
421#define	IOAT_OP_MARKER		0x84
422
423/*
424 * Deprecated OPs -- v3 DMA generates an abort if given these.  And this driver
425 * doesn't support anything older than v3.
426 */
427#define	IOAT_OP_OLD_XOR		0x85
428#define	IOAT_OP_OLD_XOR_VAL	0x86
429
430/* One of these per allocated PCI device. */
431struct ioat_softc {
432	bus_dmaengine_t		dmaengine;
433#define	to_ioat_softc(_dmaeng)						\
434({									\
435	bus_dmaengine_t *_p = (_dmaeng);				\
436	(struct ioat_softc *)((char *)_p -				\
437	    offsetof(struct ioat_softc, dmaengine));			\
438})
439
440	device_t		device;
441	int			domain;
442	int			cpu;
443	int			version;
444	unsigned		chan_idx;
445
446	bus_space_tag_t		pci_bus_tag;
447	bus_space_handle_t	pci_bus_handle;
448	struct resource		*pci_resource;
449	int			pci_resource_id;
450	uint32_t		max_xfer_size;
451	uint32_t		capabilities;
452	uint32_t		ring_size_order;
453	uint16_t		intrdelay_max;
454	uint16_t		cached_intrdelay;
455
456	int			rid;
457	struct resource		*res;
458	void			*tag;
459
460	bus_dma_tag_t		hw_desc_tag;
461	bus_dmamap_t		hw_desc_map;
462
463	bus_dma_tag_t		data_tag;
464
465	bus_dma_tag_t		comp_update_tag;
466	bus_dmamap_t		comp_update_map;
467	uint64_t		*comp_update;
468	bus_addr_t		comp_update_bus_addr;
469
470	boolean_t		quiescing;
471	boolean_t		destroying;
472	boolean_t		is_submitter_processing;
473	boolean_t		intrdelay_supported;
474	boolean_t		resetting;		/* submit_lock */
475	boolean_t		resetting_cleanup;	/* cleanup_lock */
476
477	struct ioat_descriptor	*ring;
478
479	union ioat_hw_descriptor {
480		struct ioat_generic_hw_descriptor	generic;
481		struct ioat_dma_hw_descriptor		dma;
482		struct ioat_fill_hw_descriptor		fill;
483		struct ioat_crc32_hw_descriptor		crc32;
484		struct ioat_xor_hw_descriptor		xor;
485		struct ioat_xor_ext_hw_descriptor	xor_ext;
486		struct ioat_pq_hw_descriptor		pq;
487		struct ioat_pq_ext_hw_descriptor	pq_ext;
488		struct ioat_raw_hw_descriptor		raw;
489	} *hw_desc_ring;
490	bus_addr_t		hw_desc_bus_addr;
491#define	RING_PHYS_ADDR(sc, i)	(sc)->hw_desc_bus_addr + \
492    (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor))
493
494	struct mtx_padalign	submit_lock;
495	struct callout		poll_timer;
496	struct task		reset_task;
497	struct mtx_padalign	cleanup_lock;
498
499	uint32_t		refcnt;
500	uint32_t		head;
501	uint32_t		acq_head;
502	uint32_t		tail;
503	bus_addr_t		last_seen;
504
505	struct {
506		uint64_t	interrupts;
507		uint64_t	descriptors_processed;
508		uint64_t	descriptors_error;
509		uint64_t	descriptors_submitted;
510
511		uint32_t	channel_halts;
512		uint32_t	last_halt_chanerr;
513	} stats;
514};
515
516void ioat_test_attach(void);
517void ioat_test_detach(void);
518
519/*
520 * XXX DO NOT USE this routine for obtaining the current completed descriptor.
521 *
522 * The double_4 read on ioat<3.3 appears to result in torn reads.  And v3.2
523 * hardware is still commonplace (Broadwell Xeon has it).  Instead, use the
524 * device-pushed *comp_update.
525 *
526 * It is safe to use ioat_get_chansts() for the low status bits.
527 */
528static inline uint64_t
529ioat_get_chansts(struct ioat_softc *ioat)
530{
531	uint64_t status;
532
533	if (ioat->version >= IOAT_VER_3_3)
534		status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET);
535	else
536		/* Must read lower 4 bytes before upper 4 bytes. */
537		status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET);
538	return (status);
539}
540
541static inline void
542ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr)
543{
544
545	if (ioat->version >= IOAT_VER_3_3)
546		ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
547	else
548		ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
549}
550
551static inline void
552ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr)
553{
554
555	if (ioat->version >= IOAT_VER_3_3)
556		ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
557	else
558		ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
559}
560
561static inline boolean_t
562is_ioat_active(uint64_t status)
563{
564	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
565}
566
567static inline boolean_t
568is_ioat_idle(uint64_t status)
569{
570	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE);
571}
572
573static inline boolean_t
574is_ioat_halted(uint64_t status)
575{
576	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
577}
578
579static inline boolean_t
580is_ioat_suspended(uint64_t status)
581{
582	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
583}
584
585static inline void
586ioat_suspend(struct ioat_softc *ioat)
587{
588	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND);
589}
590
591static inline void
592ioat_reset(struct ioat_softc *ioat)
593{
594	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
595}
596
597static inline boolean_t
598ioat_reset_pending(struct ioat_softc *ioat)
599{
600	uint8_t cmd;
601
602	cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET);
603	return ((cmd & IOAT_CHANCMD_RESET) != 0);
604}
605
606#endif /* __IOAT_INTERNAL_H__ */
607