1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27__FBSDID("$FreeBSD$");
28
29#ifndef __IOAT_INTERNAL_H__
30#define __IOAT_INTERNAL_H__
31
32#include <sys/_task.h>
33
34#define	DEVICE2SOFTC(dev)	((struct ioat_softc *) device_get_softc(dev))
35#define	KTR_IOAT		KTR_SPARE3
36
37#define	ioat_read_chancnt(ioat) \
38	ioat_read_1((ioat), IOAT_CHANCNT_OFFSET)
39
40#define	ioat_read_xfercap(ioat) \
41	(ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK)
42
43#define	ioat_write_intrctrl(ioat, value) \
44	ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value))
45
46#define	ioat_read_cbver(ioat) \
47	(ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF)
48
49#define	ioat_read_dmacapability(ioat) \
50	ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET)
51
52#define	ioat_write_chanctrl(ioat, value) \
53	ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value))
54
55static __inline uint64_t
56ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,
57    bus_space_handle_t handle, bus_size_t offset)
58{
59	return (bus_space_read_4(tag, handle, offset) |
60	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
61}
62
63static __inline void
64ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,
65    bus_space_handle_t handle, bus_size_t offset, uint64_t val)
66{
67	bus_space_write_4(tag, handle, offset, val);
68	bus_space_write_4(tag, handle, offset + 4, val >> 32);
69}
70
71#ifdef __i386__
72#define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first
73#define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first
74#else
75#define ioat_bus_space_read_8(tag, handle, offset) \
76	bus_space_read_8((tag), (handle), (offset))
77#define ioat_bus_space_write_8(tag, handle, offset, val) \
78	bus_space_write_8((tag), (handle), (offset), (val))
79#endif
80
81#define ioat_read_1(ioat, offset) \
82	bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
83	    (offset))
84
85#define ioat_read_2(ioat, offset) \
86	bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
87	    (offset))
88
89#define ioat_read_4(ioat, offset) \
90	bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
91	    (offset))
92
93#define ioat_read_8(ioat, offset) \
94	ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
95	    (offset))
96
97#define ioat_read_double_4(ioat, offset) \
98	ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \
99	    (ioat)->pci_bus_handle, (offset))
100
101#define ioat_write_1(ioat, offset, value) \
102	bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
103	    (offset), (value))
104
105#define ioat_write_2(ioat, offset, value) \
106	bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
107	    (offset), (value))
108
109#define ioat_write_4(ioat, offset, value) \
110	bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
111	    (offset), (value))
112
113#define ioat_write_8(ioat, offset, value) \
114	ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
115	    (offset), (value))
116
117#define ioat_write_double_4(ioat, offset, value) \
118	ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \
119	    (ioat)->pci_bus_handle, (offset), (value))
120
121MALLOC_DECLARE(M_IOAT);
122
123SYSCTL_DECL(_hw_ioat);
124
125extern int g_ioat_debug_level;
126
127struct generic_dma_control {
128	uint32_t int_enable:1;
129	uint32_t src_snoop_disable:1;
130	uint32_t dest_snoop_disable:1;
131	uint32_t completion_update:1;
132	uint32_t fence:1;
133	uint32_t reserved1:1;
134	uint32_t src_page_break:1;
135	uint32_t dest_page_break:1;
136	uint32_t bundle:1;
137	uint32_t dest_dca:1;
138	uint32_t hint:1;
139	uint32_t reserved2:13;
140	uint32_t op:8;
141};
142
143struct ioat_generic_hw_descriptor {
144	uint32_t size;
145	union {
146		uint32_t control_raw;
147		struct generic_dma_control control_generic;
148	} u;
149	uint64_t src_addr;
150	uint64_t dest_addr;
151	uint64_t next;
152	uint64_t reserved[4];
153};
154
155struct ioat_dma_hw_descriptor {
156	uint32_t size;
157	union {
158		uint32_t control_raw;
159		struct generic_dma_control control_generic;
160		struct {
161			uint32_t int_enable:1;
162			uint32_t src_snoop_disable:1;
163			uint32_t dest_snoop_disable:1;
164			uint32_t completion_update:1;
165			uint32_t fence:1;
166			uint32_t null:1;
167			uint32_t src_page_break:1;
168			uint32_t dest_page_break:1;
169			uint32_t bundle:1;
170			uint32_t dest_dca:1;
171			uint32_t hint:1;
172			uint32_t reserved:13;
173			#define IOAT_OP_COPY 0x00
174			uint32_t op:8;
175		} control;
176	} u;
177	uint64_t src_addr;
178	uint64_t dest_addr;
179	uint64_t next;
180	uint64_t next_src_addr;
181	uint64_t next_dest_addr;
182	uint64_t user1;
183	uint64_t user2;
184};
185
186struct ioat_fill_hw_descriptor {
187	uint32_t size;
188	union {
189		uint32_t control_raw;
190		struct generic_dma_control control_generic;
191		struct {
192			uint32_t int_enable:1;
193			uint32_t reserved:1;
194			uint32_t dest_snoop_disable:1;
195			uint32_t completion_update:1;
196			uint32_t fence:1;
197			uint32_t reserved2:2;
198			uint32_t dest_page_break:1;
199			uint32_t bundle:1;
200			uint32_t reserved3:15;
201			#define IOAT_OP_FILL 0x01
202			uint32_t op:8;
203		} control;
204	} u;
205	uint64_t src_data;
206	uint64_t dest_addr;
207	uint64_t next;
208	uint64_t reserved;
209	uint64_t next_dest_addr;
210	uint64_t user1;
211	uint64_t user2;
212};
213
214struct ioat_crc32_hw_descriptor {
215	uint32_t size;
216	union {
217		uint32_t control_raw;
218		struct generic_dma_control control_generic;
219		struct {
220			uint32_t int_enable:1;
221			uint32_t src_snoop_disable:1;
222			uint32_t dest_snoop_disable:1;
223			uint32_t completion_update:1;
224			uint32_t fence:1;
225			uint32_t reserved1:3;
226			uint32_t bundle:1;
227			uint32_t dest_dca:1;
228			uint32_t hint:1;
229			uint32_t use_seed:1;
230			/*
231			 * crc_location:
232			 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST:
233			 * 0: comparison value is pointed to by CRC Address
234			 *    field.
235			 * 1: comparison value follows data in wire format
236			 *    ("inverted reflected bit order") in the 4 bytes
237			 *    following the source data.
238			 *
239			 * For IOAT_OP_CRC_STORE:
240			 * 0: Result will be stored at location pointed to by
241			 *    CRC Address field (in wire format).
242			 * 1: Result will be stored directly following the
243			 *    source data.
244			 *
245			 * For IOAT_OP_MOVECRC_STORE:
246			 * 0: Result will be stored at location pointed to by
247			 *    CRC Address field (in wire format).
248			 * 1: Result will be stored directly following the
249			 *    *destination* data.
250			 */
251			uint32_t crc_location:1;
252			uint32_t reserved2:11;
253			/*
254			 * MOVECRC - Move data in the same way as standard copy
255			 * operation, but also compute CRC32.
256			 *
257			 * CRC - Only compute CRC on source data.
258			 *
259			 * There is a CRC accumulator register in the hardware.
260			 * If 'initial' is set, it is initialized to the value
261			 * in 'seed.'
262			 *
263			 * In all modes, these operators accumulate size bytes
264			 * at src_addr into the running CRC32C.
265			 *
266			 * Store mode emits the accumulated CRC, in wire
267			 * format, as specified by the crc_location bit above.
268			 *
269			 * Test mode compares the accumulated CRC against the
270			 * reference CRC, as described in crc_location above.
271			 * On failure, halts the DMA engine with a CRC error
272			 * status.
273			 */
274			#define	IOAT_OP_MOVECRC		0x41
275			#define	IOAT_OP_MOVECRC_TEST	0x42
276			#define	IOAT_OP_MOVECRC_STORE	0x43
277			#define	IOAT_OP_CRC		0x81
278			#define	IOAT_OP_CRC_TEST	0x82
279			#define	IOAT_OP_CRC_STORE	0x83
280			uint32_t op:8;
281		} control;
282	} u;
283	uint64_t src_addr;
284	uint64_t dest_addr;
285	uint64_t next;
286	uint64_t next_src_addr;
287	uint64_t next_dest_addr;
288	uint32_t seed;
289	uint32_t reserved;
290	uint64_t crc_address;
291};
292
293struct ioat_xor_hw_descriptor {
294	uint32_t size;
295	union {
296		uint32_t control_raw;
297		struct generic_dma_control control_generic;
298		struct {
299			uint32_t int_enable:1;
300			uint32_t src_snoop_disable:1;
301			uint32_t dest_snoop_disable:1;
302			uint32_t completion_update:1;
303			uint32_t fence:1;
304			uint32_t src_count:3;
305			uint32_t bundle:1;
306			uint32_t dest_dca:1;
307			uint32_t hint:1;
308			uint32_t reserved:13;
309			#define IOAT_OP_XOR 0x87
310			#define IOAT_OP_XOR_VAL 0x88
311			uint32_t op:8;
312		} control;
313	} u;
314	uint64_t src_addr;
315	uint64_t dest_addr;
316	uint64_t next;
317	uint64_t src_addr2;
318	uint64_t src_addr3;
319	uint64_t src_addr4;
320	uint64_t src_addr5;
321};
322
323struct ioat_xor_ext_hw_descriptor {
324	uint64_t src_addr6;
325	uint64_t src_addr7;
326	uint64_t src_addr8;
327	uint64_t next;
328	uint64_t reserved[4];
329};
330
331struct ioat_pq_hw_descriptor {
332	uint32_t size;
333	union {
334		uint32_t control_raw;
335		struct generic_dma_control control_generic;
336		struct {
337			uint32_t int_enable:1;
338			uint32_t src_snoop_disable:1;
339			uint32_t dest_snoop_disable:1;
340			uint32_t completion_update:1;
341			uint32_t fence:1;
342			uint32_t src_count:3;
343			uint32_t bundle:1;
344			uint32_t dest_dca:1;
345			uint32_t hint:1;
346			uint32_t p_disable:1;
347			uint32_t q_disable:1;
348			uint32_t reserved:11;
349			#define IOAT_OP_PQ 0x89
350			#define IOAT_OP_PQ_VAL 0x8a
351			uint32_t op:8;
352		} control;
353	} u;
354	uint64_t src_addr;
355	uint64_t p_addr;
356	uint64_t next;
357	uint64_t src_addr2;
358	uint64_t src_addr3;
359	uint8_t  coef[8];
360	uint64_t q_addr;
361};
362
363struct ioat_pq_ext_hw_descriptor {
364	uint64_t src_addr4;
365	uint64_t src_addr5;
366	uint64_t src_addr6;
367	uint64_t next;
368	uint64_t src_addr7;
369	uint64_t src_addr8;
370	uint64_t reserved[2];
371};
372
373struct ioat_pq_update_hw_descriptor {
374	uint32_t size;
375	union {
376		uint32_t control_raw;
377		struct generic_dma_control control_generic;
378		struct {
379			uint32_t int_enable:1;
380			uint32_t src_snoop_disable:1;
381			uint32_t dest_snoop_disable:1;
382			uint32_t completion_update:1;
383			uint32_t fence:1;
384			uint32_t src_cnt:3;
385			uint32_t bundle:1;
386			uint32_t dest_dca:1;
387			uint32_t hint:1;
388			uint32_t p_disable:1;
389			uint32_t q_disable:1;
390			uint32_t reserved:3;
391			uint32_t coef:8;
392			#define IOAT_OP_PQ_UP 0x8b
393			uint32_t op:8;
394		} control;
395	} u;
396	uint64_t src_addr;
397	uint64_t p_addr;
398	uint64_t next;
399	uint64_t src_addr2;
400	uint64_t p_src;
401	uint64_t q_src;
402	uint64_t q_addr;
403};
404
405struct ioat_raw_hw_descriptor {
406	uint64_t field[8];
407};
408
409struct bus_dmadesc {
410	bus_dmaengine_callback_t callback_fn;
411	void			 *callback_arg;
412};
413
414struct ioat_descriptor {
415	struct bus_dmadesc	bus_dmadesc;
416	uint32_t		id;
417	bus_dmamap_t		src_dmamap;
418	bus_dmamap_t		dst_dmamap;
419	bus_dmamap_t		src2_dmamap;
420	bus_dmamap_t		dst2_dmamap;
421};
422
423/* Unused by this driver at this time. */
424#define	IOAT_OP_MARKER		0x84
425
426/*
427 * Deprecated OPs -- v3 DMA generates an abort if given these.  And this driver
428 * doesn't support anything older than v3.
429 */
430#define	IOAT_OP_OLD_XOR		0x85
431#define	IOAT_OP_OLD_XOR_VAL	0x86
432
433/* One of these per allocated PCI device. */
434struct ioat_softc {
435	bus_dmaengine_t		dmaengine;
436#define	to_ioat_softc(_dmaeng)						\
437({									\
438	bus_dmaengine_t *_p = (_dmaeng);				\
439	(struct ioat_softc *)((char *)_p -				\
440	    offsetof(struct ioat_softc, dmaengine));			\
441})
442
443	device_t		device;
444	int			domain;
445	int			cpu;
446	int			version;
447	unsigned		chan_idx;
448
449	bus_space_tag_t		pci_bus_tag;
450	bus_space_handle_t	pci_bus_handle;
451	struct resource		*pci_resource;
452	int			pci_resource_id;
453	uint32_t		max_xfer_size;
454	uint32_t		capabilities;
455	uint32_t		ring_size_order;
456	uint16_t		intrdelay_max;
457	uint16_t		cached_intrdelay;
458
459	int			rid;
460	struct resource		*res;
461	void			*tag;
462
463	bus_dma_tag_t		hw_desc_tag;
464	bus_dmamap_t		hw_desc_map;
465
466	bus_dma_tag_t		data_tag;
467
468	bus_dma_tag_t		comp_update_tag;
469	bus_dmamap_t		comp_update_map;
470	uint64_t		*comp_update;
471	bus_addr_t		comp_update_bus_addr;
472
473	boolean_t		quiescing;
474	boolean_t		destroying;
475	boolean_t		is_submitter_processing;
476	boolean_t		intrdelay_supported;
477	boolean_t		resetting;		/* submit_lock */
478	boolean_t		resetting_cleanup;	/* cleanup_lock */
479
480	struct ioat_descriptor	*ring;
481
482	union ioat_hw_descriptor {
483		struct ioat_generic_hw_descriptor	generic;
484		struct ioat_dma_hw_descriptor		dma;
485		struct ioat_fill_hw_descriptor		fill;
486		struct ioat_crc32_hw_descriptor		crc32;
487		struct ioat_xor_hw_descriptor		xor;
488		struct ioat_xor_ext_hw_descriptor	xor_ext;
489		struct ioat_pq_hw_descriptor		pq;
490		struct ioat_pq_ext_hw_descriptor	pq_ext;
491		struct ioat_raw_hw_descriptor		raw;
492	} *hw_desc_ring;
493	bus_addr_t		hw_desc_bus_addr;
494#define	RING_PHYS_ADDR(sc, i)	(sc)->hw_desc_bus_addr + \
495    (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor))
496
497	struct mtx_padalign	submit_lock;
498	struct callout		poll_timer;
499	struct task		reset_task;
500	struct mtx_padalign	cleanup_lock;
501
502	uint32_t		refcnt;
503	uint32_t		head;
504	uint32_t		acq_head;
505	uint32_t		tail;
506	bus_addr_t		last_seen;
507
508	struct {
509		uint64_t	interrupts;
510		uint64_t	descriptors_processed;
511		uint64_t	descriptors_error;
512		uint64_t	descriptors_submitted;
513
514		uint32_t	channel_halts;
515		uint32_t	last_halt_chanerr;
516	} stats;
517};
518
519void ioat_test_attach(void);
520void ioat_test_detach(void);
521
522/*
523 * XXX DO NOT USE this routine for obtaining the current completed descriptor.
524 *
525 * The double_4 read on ioat<3.3 appears to result in torn reads.  And v3.2
526 * hardware is still commonplace (Broadwell Xeon has it).  Instead, use the
527 * device-pushed *comp_update.
528 *
529 * It is safe to use ioat_get_chansts() for the low status bits.
530 */
531static inline uint64_t
532ioat_get_chansts(struct ioat_softc *ioat)
533{
534	uint64_t status;
535
536	if (ioat->version >= IOAT_VER_3_3)
537		status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET);
538	else
539		/* Must read lower 4 bytes before upper 4 bytes. */
540		status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET);
541	return (status);
542}
543
544static inline void
545ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr)
546{
547
548	if (ioat->version >= IOAT_VER_3_3)
549		ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
550	else
551		ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
552}
553
554static inline void
555ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr)
556{
557
558	if (ioat->version >= IOAT_VER_3_3)
559		ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
560	else
561		ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
562}
563
564static inline boolean_t
565is_ioat_active(uint64_t status)
566{
567	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
568}
569
570static inline boolean_t
571is_ioat_idle(uint64_t status)
572{
573	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE);
574}
575
576static inline boolean_t
577is_ioat_halted(uint64_t status)
578{
579	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
580}
581
582static inline boolean_t
583is_ioat_suspended(uint64_t status)
584{
585	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
586}
587
588static inline void
589ioat_suspend(struct ioat_softc *ioat)
590{
591	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND);
592}
593
594static inline void
595ioat_reset(struct ioat_softc *ioat)
596{
597	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
598}
599
600static inline boolean_t
601ioat_reset_pending(struct ioat_softc *ioat)
602{
603	uint8_t cmd;
604
605	cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET);
606	return ((cmd & IOAT_CHANCMD_RESET) != 0);
607}
608
609#endif /* __IOAT_INTERNAL_H__ */
610