1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 * Copyright (C) 2018 Alexander Motin <mav@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29#include "opt_ddb.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/bus.h>
34#include <sys/conf.h>
35#include <sys/domainset.h>
36#include <sys/fail.h>
37#include <sys/ioccom.h>
38#include <sys/kernel.h>
39#include <sys/ktr.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/rman.h>
45#include <sys/sbuf.h>
46#include <sys/smp.h>
47#include <sys/sysctl.h>
48#include <sys/taskqueue.h>
49#include <sys/time.h>
50#include <dev/pci/pcireg.h>
51#include <dev/pci/pcivar.h>
52#include <machine/bus.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#ifdef DDB
57#include <ddb/ddb.h>
58#endif
59
60#include "ioat.h"
61#include "ioat_hw.h"
62#include "ioat_internal.h"
63
64#ifndef	BUS_SPACE_MAXADDR_40BIT
65#define	BUS_SPACE_MAXADDR_40BIT	MIN(BUS_SPACE_MAXADDR, 0xFFFFFFFFFFULL)
66#endif
67#ifndef	BUS_SPACE_MAXADDR_46BIT
68#define	BUS_SPACE_MAXADDR_46BIT	MIN(BUS_SPACE_MAXADDR, 0x3FFFFFFFFFFFULL)
69#endif
70
71static int ioat_modevent(module_t mod, int type, void *data);
72static int ioat_probe(device_t device);
73static int ioat_attach(device_t device);
74static int ioat_detach(device_t device);
75static int ioat_setup_intr(struct ioat_softc *ioat);
76static int ioat_teardown_intr(struct ioat_softc *ioat);
77static int ioat3_attach(device_t device);
78static int ioat_start_channel(struct ioat_softc *ioat);
79static int ioat_map_pci_bar(struct ioat_softc *ioat);
80static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg,
81    int error);
82static void ioat_interrupt_handler(void *arg);
83static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat);
84static int chanerr_to_errno(uint32_t);
85static void ioat_process_events(struct ioat_softc *ioat, boolean_t intr);
86static inline uint32_t ioat_get_active(struct ioat_softc *ioat);
87static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat);
88static void ioat_free_ring(struct ioat_softc *, uint32_t size,
89    struct ioat_descriptor *);
90static int ioat_reserve_space(struct ioat_softc *, uint32_t, int mflags);
91static union ioat_hw_descriptor *ioat_get_descriptor(struct ioat_softc *,
92    uint32_t index);
93static struct ioat_descriptor *ioat_get_ring_entry(struct ioat_softc *,
94    uint32_t index);
95static void ioat_halted_debug(struct ioat_softc *, uint32_t);
96static void ioat_poll_timer_callback(void *arg);
97static void dump_descriptor(void *hw_desc);
98static void ioat_submit_single(struct ioat_softc *ioat);
99static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg,
100    int error);
101static int ioat_reset_hw(struct ioat_softc *ioat);
102static void ioat_reset_hw_task(void *, int);
103static void ioat_setup_sysctl(device_t device);
104static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS);
105static void ioat_get(struct ioat_softc *);
106static void ioat_put(struct ioat_softc *);
107static void ioat_drain_locked(struct ioat_softc *);
108
109#define	ioat_log_message(v, ...) do {					\
110	if ((v) <= g_ioat_debug_level) {				\
111		device_printf(ioat->device, __VA_ARGS__);		\
112	}								\
113} while (0)
114
115MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations");
116SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
117    "ioat node");
118
119static int g_force_legacy_interrupts;
120SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN,
121    &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled");
122
123int g_ioat_debug_level = 0;
124SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level,
125    0, "Set log level (0-3) for ioat(4). Higher is more verbose.");
126
127unsigned g_ioat_ring_order = 13;
128SYSCTL_UINT(_hw_ioat, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ioat_ring_order,
129    0, "Set IOAT ring order.  (1 << this) == ring size.");
130
131/*
132 * OS <-> Driver interface structures
133 */
134static device_method_t ioat_pci_methods[] = {
135	/* Device interface */
136	DEVMETHOD(device_probe,     ioat_probe),
137	DEVMETHOD(device_attach,    ioat_attach),
138	DEVMETHOD(device_detach,    ioat_detach),
139	DEVMETHOD_END
140};
141
142static driver_t ioat_pci_driver = {
143	"ioat",
144	ioat_pci_methods,
145	sizeof(struct ioat_softc),
146};
147
148DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_modevent, NULL);
149MODULE_VERSION(ioat, 1);
150
151/*
152 * Private data structures
153 */
154static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS];
155static unsigned ioat_channel_index = 0;
156SYSCTL_UINT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0,
157    "Number of IOAT channels attached");
158static struct mtx ioat_list_mtx;
159MTX_SYSINIT(ioat_list_mtx, &ioat_list_mtx, "ioat list mtx", MTX_DEF);
160
161static struct _pcsid
162{
163	u_int32_t   type;
164	const char  *desc;
165} pci_ids[] = {
166	{ 0x34308086, "TBG IOAT Ch0" },
167	{ 0x34318086, "TBG IOAT Ch1" },
168	{ 0x34328086, "TBG IOAT Ch2" },
169	{ 0x34338086, "TBG IOAT Ch3" },
170	{ 0x34298086, "TBG IOAT Ch4" },
171	{ 0x342a8086, "TBG IOAT Ch5" },
172	{ 0x342b8086, "TBG IOAT Ch6" },
173	{ 0x342c8086, "TBG IOAT Ch7" },
174
175	{ 0x37108086, "JSF IOAT Ch0" },
176	{ 0x37118086, "JSF IOAT Ch1" },
177	{ 0x37128086, "JSF IOAT Ch2" },
178	{ 0x37138086, "JSF IOAT Ch3" },
179	{ 0x37148086, "JSF IOAT Ch4" },
180	{ 0x37158086, "JSF IOAT Ch5" },
181	{ 0x37168086, "JSF IOAT Ch6" },
182	{ 0x37178086, "JSF IOAT Ch7" },
183	{ 0x37188086, "JSF IOAT Ch0 (RAID)" },
184	{ 0x37198086, "JSF IOAT Ch1 (RAID)" },
185
186	{ 0x3c208086, "SNB IOAT Ch0" },
187	{ 0x3c218086, "SNB IOAT Ch1" },
188	{ 0x3c228086, "SNB IOAT Ch2" },
189	{ 0x3c238086, "SNB IOAT Ch3" },
190	{ 0x3c248086, "SNB IOAT Ch4" },
191	{ 0x3c258086, "SNB IOAT Ch5" },
192	{ 0x3c268086, "SNB IOAT Ch6" },
193	{ 0x3c278086, "SNB IOAT Ch7" },
194	{ 0x3c2e8086, "SNB IOAT Ch0 (RAID)" },
195	{ 0x3c2f8086, "SNB IOAT Ch1 (RAID)" },
196
197	{ 0x0e208086, "IVB IOAT Ch0" },
198	{ 0x0e218086, "IVB IOAT Ch1" },
199	{ 0x0e228086, "IVB IOAT Ch2" },
200	{ 0x0e238086, "IVB IOAT Ch3" },
201	{ 0x0e248086, "IVB IOAT Ch4" },
202	{ 0x0e258086, "IVB IOAT Ch5" },
203	{ 0x0e268086, "IVB IOAT Ch6" },
204	{ 0x0e278086, "IVB IOAT Ch7" },
205	{ 0x0e2e8086, "IVB IOAT Ch0 (RAID)" },
206	{ 0x0e2f8086, "IVB IOAT Ch1 (RAID)" },
207
208	{ 0x2f208086, "HSW IOAT Ch0" },
209	{ 0x2f218086, "HSW IOAT Ch1" },
210	{ 0x2f228086, "HSW IOAT Ch2" },
211	{ 0x2f238086, "HSW IOAT Ch3" },
212	{ 0x2f248086, "HSW IOAT Ch4" },
213	{ 0x2f258086, "HSW IOAT Ch5" },
214	{ 0x2f268086, "HSW IOAT Ch6" },
215	{ 0x2f278086, "HSW IOAT Ch7" },
216	{ 0x2f2e8086, "HSW IOAT Ch0 (RAID)" },
217	{ 0x2f2f8086, "HSW IOAT Ch1 (RAID)" },
218
219	{ 0x0c508086, "BWD IOAT Ch0" },
220	{ 0x0c518086, "BWD IOAT Ch1" },
221	{ 0x0c528086, "BWD IOAT Ch2" },
222	{ 0x0c538086, "BWD IOAT Ch3" },
223
224	{ 0x6f508086, "BDXDE IOAT Ch0" },
225	{ 0x6f518086, "BDXDE IOAT Ch1" },
226	{ 0x6f528086, "BDXDE IOAT Ch2" },
227	{ 0x6f538086, "BDXDE IOAT Ch3" },
228
229	{ 0x6f208086, "BDX IOAT Ch0" },
230	{ 0x6f218086, "BDX IOAT Ch1" },
231	{ 0x6f228086, "BDX IOAT Ch2" },
232	{ 0x6f238086, "BDX IOAT Ch3" },
233	{ 0x6f248086, "BDX IOAT Ch4" },
234	{ 0x6f258086, "BDX IOAT Ch5" },
235	{ 0x6f268086, "BDX IOAT Ch6" },
236	{ 0x6f278086, "BDX IOAT Ch7" },
237	{ 0x6f2e8086, "BDX IOAT Ch0 (RAID)" },
238	{ 0x6f2f8086, "BDX IOAT Ch1 (RAID)" },
239
240	{ 0x20218086, "SKX IOAT" },
241
242	{ 0x0b008086, "ICX IOAT" },
243};
244
245MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ioat, pci_ids,
246    nitems(pci_ids));
247
248/*
249 * OS <-> Driver linkage functions
250 */
251static int
252ioat_modevent(module_t mod __unused, int type, void *data __unused)
253{
254	switch(type) {
255	case MOD_LOAD:
256		break;
257
258	case MOD_UNLOAD:
259		ioat_test_detach();
260		break;
261
262	case MOD_SHUTDOWN:
263		break;
264
265	default:
266		return (EOPNOTSUPP);
267	}
268
269	return (0);
270}
271
272static int
273ioat_probe(device_t device)
274{
275	struct _pcsid *ep;
276	u_int32_t type;
277
278	type = pci_get_devid(device);
279	for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) {
280		if (ep->type == type) {
281			device_set_desc(device, ep->desc);
282			return (0);
283		}
284	}
285	return (ENXIO);
286}
287
288static int
289ioat_attach(device_t device)
290{
291	struct ioat_softc *ioat;
292	int error, i;
293
294	ioat = DEVICE2SOFTC(device);
295	ioat->device = device;
296	if (bus_get_domain(device, &ioat->domain) != 0)
297		ioat->domain = 0;
298	ioat->cpu = CPU_FFS(&cpuset_domain[ioat->domain]) - 1;
299	if (ioat->cpu < 0)
300		ioat->cpu = CPU_FIRST();
301
302	error = ioat_map_pci_bar(ioat);
303	if (error != 0)
304		goto err;
305
306	ioat->version = ioat_read_cbver(ioat);
307	if (ioat->version < IOAT_VER_3_0) {
308		error = ENODEV;
309		goto err;
310	}
311
312	error = ioat3_attach(device);
313	if (error != 0)
314		goto err;
315
316	error = pci_enable_busmaster(device);
317	if (error != 0)
318		goto err;
319
320	error = ioat_setup_intr(ioat);
321	if (error != 0)
322		goto err;
323
324	error = ioat_reset_hw(ioat);
325	if (error != 0)
326		goto err;
327
328	ioat_process_events(ioat, FALSE);
329	ioat_setup_sysctl(device);
330
331	mtx_lock(&ioat_list_mtx);
332	for (i = 0; i < IOAT_MAX_CHANNELS; i++) {
333		if (ioat_channel[i] == NULL)
334			break;
335	}
336	if (i >= IOAT_MAX_CHANNELS) {
337		mtx_unlock(&ioat_list_mtx);
338		device_printf(device, "Too many I/OAT devices in system\n");
339		error = ENXIO;
340		goto err;
341	}
342	ioat->chan_idx = i;
343	ioat_channel[i] = ioat;
344	if (i >= ioat_channel_index)
345		ioat_channel_index = i + 1;
346	mtx_unlock(&ioat_list_mtx);
347
348	ioat_test_attach();
349
350err:
351	if (error != 0)
352		ioat_detach(device);
353	return (error);
354}
355
356static inline int
357ioat_bus_dmamap_destroy(struct ioat_softc *ioat, const char *func,
358    bus_dma_tag_t dmat, bus_dmamap_t map)
359{
360	int error;
361
362	error = bus_dmamap_destroy(dmat, map);
363	if (error != 0) {
364		ioat_log_message(0,
365		    "%s: bus_dmamap_destroy failed %d\n", func, error);
366	}
367
368	return (error);
369}
370
371static int
372ioat_detach(device_t device)
373{
374	struct ioat_softc *ioat;
375	int i, error;
376
377	ioat = DEVICE2SOFTC(device);
378
379	mtx_lock(&ioat_list_mtx);
380	ioat_channel[ioat->chan_idx] = NULL;
381	while (ioat_channel_index > 0 &&
382	    ioat_channel[ioat_channel_index - 1] == NULL)
383		ioat_channel_index--;
384	mtx_unlock(&ioat_list_mtx);
385
386	taskqueue_drain(taskqueue_thread, &ioat->reset_task);
387
388	mtx_lock(&ioat->submit_lock);
389	ioat->quiescing = TRUE;
390	ioat->destroying = TRUE;
391	wakeup(&ioat->quiescing);
392	wakeup(&ioat->resetting);
393
394	ioat_drain_locked(ioat);
395	mtx_unlock(&ioat->submit_lock);
396	mtx_lock(&ioat->cleanup_lock);
397	while (ioat_get_active(ioat) > 0)
398		msleep(&ioat->tail, &ioat->cleanup_lock, 0, "ioat_drain", 1);
399	mtx_unlock(&ioat->cleanup_lock);
400
401	ioat_teardown_intr(ioat);
402	callout_drain(&ioat->poll_timer);
403
404	pci_disable_busmaster(device);
405
406	if (ioat->pci_resource != NULL)
407		bus_release_resource(device, SYS_RES_MEMORY,
408		    ioat->pci_resource_id, ioat->pci_resource);
409
410	if (ioat->data_tag != NULL) {
411		for (i = 0; i < 1 << ioat->ring_size_order; i++) {
412			error = ioat_bus_dmamap_destroy(ioat, __func__,
413			    ioat->data_tag, ioat->ring[i].src_dmamap);
414			if (error != 0)
415				return (error);
416		}
417		for (i = 0; i < 1 << ioat->ring_size_order; i++) {
418			error = ioat_bus_dmamap_destroy(ioat, __func__,
419			    ioat->data_tag, ioat->ring[i].dst_dmamap);
420			if (error != 0)
421				return (error);
422		}
423
424		for (i = 0; i < 1 << ioat->ring_size_order; i++) {
425			error = ioat_bus_dmamap_destroy(ioat, __func__,
426			    ioat->data_tag, ioat->ring[i].src2_dmamap);
427			if (error != 0)
428				return (error);
429		}
430		for (i = 0; i < 1 << ioat->ring_size_order; i++) {
431			error = ioat_bus_dmamap_destroy(ioat, __func__,
432			    ioat->data_tag, ioat->ring[i].dst2_dmamap);
433			if (error != 0)
434				return (error);
435		}
436
437		bus_dma_tag_destroy(ioat->data_tag);
438	}
439
440	if (ioat->ring != NULL)
441		ioat_free_ring(ioat, 1 << ioat->ring_size_order, ioat->ring);
442
443	if (ioat->comp_update != NULL) {
444		bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map);
445		bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update,
446		    ioat->comp_update_map);
447		bus_dma_tag_destroy(ioat->comp_update_tag);
448	}
449
450	if (ioat->hw_desc_ring != NULL) {
451		bus_dmamap_unload(ioat->hw_desc_tag, ioat->hw_desc_map);
452		bus_dmamem_free(ioat->hw_desc_tag, ioat->hw_desc_ring,
453		    ioat->hw_desc_map);
454		bus_dma_tag_destroy(ioat->hw_desc_tag);
455	}
456
457	return (0);
458}
459
460static int
461ioat_teardown_intr(struct ioat_softc *ioat)
462{
463
464	if (ioat->tag != NULL)
465		bus_teardown_intr(ioat->device, ioat->res, ioat->tag);
466
467	if (ioat->res != NULL)
468		bus_release_resource(ioat->device, SYS_RES_IRQ,
469		    rman_get_rid(ioat->res), ioat->res);
470
471	pci_release_msi(ioat->device);
472	return (0);
473}
474
475static int
476ioat_start_channel(struct ioat_softc *ioat)
477{
478	struct ioat_dma_hw_descriptor *hw_desc;
479	struct ioat_descriptor *desc;
480	struct bus_dmadesc *dmadesc;
481	uint64_t status;
482	uint32_t chanerr;
483	int i;
484
485	ioat_acquire(&ioat->dmaengine);
486
487	/* Submit 'NULL' operation manually to avoid quiescing flag */
488	desc = ioat_get_ring_entry(ioat, ioat->head);
489	hw_desc = &ioat_get_descriptor(ioat, ioat->head)->dma;
490	dmadesc = &desc->bus_dmadesc;
491
492	dmadesc->callback_fn = NULL;
493	dmadesc->callback_arg = NULL;
494
495	hw_desc->u.control_raw = 0;
496	hw_desc->u.control_generic.op = IOAT_OP_COPY;
497	hw_desc->u.control_generic.completion_update = 1;
498	hw_desc->size = 8;
499	hw_desc->src_addr = 0;
500	hw_desc->dest_addr = 0;
501	hw_desc->u.control.null = 1;
502
503	ioat_submit_single(ioat);
504	ioat_release(&ioat->dmaengine);
505
506	for (i = 0; i < 100; i++) {
507		DELAY(1);
508		status = ioat_get_chansts(ioat);
509		if (is_ioat_idle(status))
510			return (0);
511	}
512
513	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
514	ioat_log_message(0, "could not start channel: "
515	    "status = %#jx error = %b\n", (uintmax_t)status, (int)chanerr,
516	    IOAT_CHANERR_STR);
517	return (ENXIO);
518}
519
520/*
521 * Initialize Hardware
522 */
523static int
524ioat3_attach(device_t device)
525{
526	struct ioat_softc *ioat;
527	struct ioat_descriptor *ring;
528	struct ioat_dma_hw_descriptor *dma_hw_desc;
529	void *hw_desc;
530	bus_addr_t lowaddr;
531	size_t ringsz;
532	int i, num_descriptors;
533	int error;
534	uint8_t xfercap;
535
536	error = 0;
537	ioat = DEVICE2SOFTC(device);
538	ioat->capabilities = ioat_read_dmacapability(ioat);
539
540	ioat_log_message(0, "Capabilities: %b\n", (int)ioat->capabilities,
541	    IOAT_DMACAP_STR);
542
543	xfercap = ioat_read_xfercap(ioat);
544	ioat->max_xfer_size = 1 << xfercap;
545
546	ioat->intrdelay_supported = (ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) &
547	    IOAT_INTRDELAY_SUPPORTED) != 0;
548	if (ioat->intrdelay_supported)
549		ioat->intrdelay_max = IOAT_INTRDELAY_US_MASK;
550
551	/* TODO: need to check DCA here if we ever do XOR/PQ */
552
553	mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF);
554	mtx_init(&ioat->cleanup_lock, "ioat_cleanup", NULL, MTX_DEF);
555	callout_init(&ioat->poll_timer, 1);
556	TASK_INIT(&ioat->reset_task, 0, ioat_reset_hw_task, ioat);
557
558	/* Establish lock order for Witness */
559	mtx_lock(&ioat->cleanup_lock);
560	mtx_lock(&ioat->submit_lock);
561	mtx_unlock(&ioat->submit_lock);
562	mtx_unlock(&ioat->cleanup_lock);
563
564	ioat->is_submitter_processing = FALSE;
565
566	if (ioat->version >= IOAT_VER_3_3)
567		lowaddr = BUS_SPACE_MAXADDR_48BIT;
568	else if (ioat->version >= IOAT_VER_3_2)
569		lowaddr = BUS_SPACE_MAXADDR_46BIT;
570	else
571		lowaddr = BUS_SPACE_MAXADDR_40BIT;
572
573	error = bus_dma_tag_create(bus_get_dma_tag(ioat->device),
574	    sizeof(uint64_t), 0x0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL,
575	    sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL,
576	    &ioat->comp_update_tag);
577	if (error != 0)
578		return (error);
579
580	error = bus_dmamem_alloc(ioat->comp_update_tag,
581	    (void **)&ioat->comp_update, BUS_DMA_ZERO | BUS_DMA_WAITOK,
582	    &ioat->comp_update_map);
583	if (error != 0)
584		return (error);
585
586	error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map,
587	    ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat,
588	    BUS_DMA_NOWAIT);
589	if (error != 0)
590		return (error);
591
592	ioat->ring_size_order = g_ioat_ring_order;
593	num_descriptors = 1 << ioat->ring_size_order;
594	ringsz = sizeof(struct ioat_dma_hw_descriptor) * num_descriptors;
595
596	error = bus_dma_tag_create(bus_get_dma_tag(ioat->device),
597	    2 * 1024 * 1024, 0x0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL,
598	    ringsz, 1, ringsz, 0, NULL, NULL, &ioat->hw_desc_tag);
599	if (error != 0)
600		return (error);
601
602	error = bus_dmamem_alloc(ioat->hw_desc_tag, &hw_desc,
603	    BUS_DMA_ZERO | BUS_DMA_WAITOK, &ioat->hw_desc_map);
604	if (error != 0)
605		return (error);
606
607	error = bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc,
608	    ringsz, ioat_dmamap_cb, &ioat->hw_desc_bus_addr, BUS_DMA_NOWAIT);
609	if (error)
610		return (error);
611
612	ioat->hw_desc_ring = hw_desc;
613
614	error = bus_dma_tag_create(bus_get_dma_tag(ioat->device),
615	    1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL,
616	    ioat->max_xfer_size, 1, ioat->max_xfer_size, 0, NULL, NULL,
617	    &ioat->data_tag);
618	if (error != 0)
619		return (error);
620	ioat->ring = malloc_domainset(num_descriptors * sizeof(*ring), M_IOAT,
621	    DOMAINSET_PREF(ioat->domain), M_ZERO | M_WAITOK);
622
623	ring = ioat->ring;
624	for (i = 0; i < num_descriptors; i++) {
625		memset(&ring[i].bus_dmadesc, 0, sizeof(ring[i].bus_dmadesc));
626		ring[i].id = i;
627		error = bus_dmamap_create(ioat->data_tag, 0,
628                    &ring[i].src_dmamap);
629		if (error != 0) {
630			ioat_log_message(0,
631			    "%s: bus_dmamap_create failed %d\n", __func__,
632			    error);
633			return (error);
634		}
635		error = bus_dmamap_create(ioat->data_tag, 0,
636                    &ring[i].dst_dmamap);
637		if (error != 0) {
638			ioat_log_message(0,
639			    "%s: bus_dmamap_create failed %d\n", __func__,
640			    error);
641			return (error);
642		}
643		error = bus_dmamap_create(ioat->data_tag, 0,
644                    &ring[i].src2_dmamap);
645		if (error != 0) {
646			ioat_log_message(0,
647			    "%s: bus_dmamap_create failed %d\n", __func__,
648			    error);
649			return (error);
650		}
651		error = bus_dmamap_create(ioat->data_tag, 0,
652                    &ring[i].dst2_dmamap);
653		if (error != 0) {
654			ioat_log_message(0,
655			    "%s: bus_dmamap_create failed %d\n", __func__,
656			    error);
657			return (error);
658		}
659	}
660
661	for (i = 0; i < num_descriptors; i++) {
662		dma_hw_desc = &ioat->hw_desc_ring[i].dma;
663		dma_hw_desc->next = RING_PHYS_ADDR(ioat, i + 1);
664	}
665
666	ioat->tail = ioat->head = 0;
667	*ioat->comp_update = ioat->last_seen =
668	    RING_PHYS_ADDR(ioat, ioat->tail - 1);
669	return (0);
670}
671
672static int
673ioat_map_pci_bar(struct ioat_softc *ioat)
674{
675
676	ioat->pci_resource_id = PCIR_BAR(0);
677	ioat->pci_resource = bus_alloc_resource_any(ioat->device,
678	    SYS_RES_MEMORY, &ioat->pci_resource_id, RF_ACTIVE);
679
680	if (ioat->pci_resource == NULL) {
681		ioat_log_message(0, "unable to allocate pci resource\n");
682		return (ENODEV);
683	}
684
685	ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource);
686	ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource);
687	return (0);
688}
689
690static void
691ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
692{
693	struct ioat_softc *ioat = arg;
694
695	KASSERT(error == 0, ("%s: error:%d", __func__, error));
696	ioat->comp_update_bus_addr = seg[0].ds_addr;
697}
698
699static void
700ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
701{
702	bus_addr_t *baddr;
703
704	KASSERT(error == 0, ("%s: error:%d", __func__, error));
705	baddr = arg;
706	*baddr = segs->ds_addr;
707}
708
709/*
710 * Interrupt setup and handlers
711 */
712static int
713ioat_setup_intr(struct ioat_softc *ioat)
714{
715	uint32_t num_vectors;
716	int error;
717	boolean_t use_msix;
718
719	use_msix = FALSE;
720
721	if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) {
722		num_vectors = 1;
723		pci_alloc_msix(ioat->device, &num_vectors);
724		if (num_vectors == 1)
725			use_msix = TRUE;
726	}
727
728	if (use_msix) {
729		ioat->rid = 1;
730		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
731		    &ioat->rid, RF_ACTIVE);
732	} else {
733		ioat->rid = 0;
734		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
735		    &ioat->rid, RF_SHAREABLE | RF_ACTIVE);
736	}
737	if (ioat->res == NULL) {
738		ioat_log_message(0, "bus_alloc_resource failed\n");
739		return (ENOMEM);
740	}
741
742	ioat->tag = NULL;
743	error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE |
744	    INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag);
745	if (error != 0) {
746		ioat_log_message(0, "bus_setup_intr failed\n");
747		return (error);
748	}
749
750	ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN);
751	return (0);
752}
753
754static boolean_t
755ioat_model_resets_msix(struct ioat_softc *ioat)
756{
757	u_int32_t pciid;
758
759	pciid = pci_get_devid(ioat->device);
760	switch (pciid) {
761		/* BWD: */
762	case 0x0c508086:
763	case 0x0c518086:
764	case 0x0c528086:
765	case 0x0c538086:
766		/* BDXDE: */
767	case 0x6f508086:
768	case 0x6f518086:
769	case 0x6f528086:
770	case 0x6f538086:
771		return (TRUE);
772	}
773
774	return (FALSE);
775}
776
777static void
778ioat_interrupt_handler(void *arg)
779{
780	struct ioat_softc *ioat = arg;
781
782	ioat->stats.interrupts++;
783	ioat_process_events(ioat, TRUE);
784}
785
786static int
787chanerr_to_errno(uint32_t chanerr)
788{
789
790	if (chanerr == 0)
791		return (0);
792	if ((chanerr & (IOAT_CHANERR_XSADDERR | IOAT_CHANERR_XDADDERR)) != 0)
793		return (EFAULT);
794	if ((chanerr & (IOAT_CHANERR_RDERR | IOAT_CHANERR_WDERR)) != 0)
795		return (EIO);
796	/* This one is probably our fault: */
797	if ((chanerr & IOAT_CHANERR_NDADDERR) != 0)
798		return (EIO);
799	return (EIO);
800}
801
802static void
803ioat_process_events(struct ioat_softc *ioat, boolean_t intr)
804{
805	struct ioat_descriptor *desc;
806	struct bus_dmadesc *dmadesc;
807	uint64_t comp_update, status;
808	uint32_t completed, chanerr;
809	int error __diagused;
810
811	if (intr) {
812		mtx_lock(&ioat->cleanup_lock);
813	} else {
814		if (!mtx_trylock(&ioat->cleanup_lock))
815			return;
816	}
817
818	/*
819	 * Don't run while the hardware is being reset.  Reset is responsible
820	 * for blocking new work and draining & completing existing work, so
821	 * there is nothing to do until new work is queued after reset anyway.
822	 */
823	if (ioat->resetting_cleanup) {
824		mtx_unlock(&ioat->cleanup_lock);
825		return;
826	}
827
828	completed = 0;
829	comp_update = *ioat->comp_update;
830	status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
831
832	if (status < ioat->hw_desc_bus_addr ||
833	    status >= ioat->hw_desc_bus_addr + (1 << ioat->ring_size_order) *
834	    sizeof(struct ioat_generic_hw_descriptor))
835		panic("Bogus completion address %jx (channel %u)",
836		    (uintmax_t)status, ioat->chan_idx);
837
838	if (status == ioat->last_seen) {
839		/*
840		 * If we landed in process_events and nothing has been
841		 * completed, check for a timeout due to channel halt.
842		 */
843		goto out;
844	}
845	CTR4(KTR_IOAT, "%s channel=%u hw_status=0x%lx last_seen=0x%lx",
846	    __func__, ioat->chan_idx, comp_update, ioat->last_seen);
847
848	while (RING_PHYS_ADDR(ioat, ioat->tail - 1) != status) {
849		desc = ioat_get_ring_entry(ioat, ioat->tail);
850		dmadesc = &desc->bus_dmadesc;
851		CTR5(KTR_IOAT, "channel=%u completing desc idx %u (%p) ok  cb %p(%p)",
852		    ioat->chan_idx, ioat->tail, dmadesc, dmadesc->callback_fn,
853		    dmadesc->callback_arg);
854
855		bus_dmamap_unload(ioat->data_tag, desc->src_dmamap);
856		bus_dmamap_unload(ioat->data_tag, desc->dst_dmamap);
857		bus_dmamap_unload(ioat->data_tag, desc->src2_dmamap);
858		bus_dmamap_unload(ioat->data_tag, desc->dst2_dmamap);
859
860		if (dmadesc->callback_fn != NULL)
861			dmadesc->callback_fn(dmadesc->callback_arg, 0);
862
863		completed++;
864		ioat->tail++;
865	}
866	CTR5(KTR_IOAT, "%s channel=%u head=%u tail=%u active=%u", __func__,
867	    ioat->chan_idx, ioat->head, ioat->tail, ioat_get_active(ioat));
868
869	if (completed != 0) {
870		ioat->last_seen = RING_PHYS_ADDR(ioat, ioat->tail - 1);
871		ioat->stats.descriptors_processed += completed;
872		wakeup(&ioat->tail);
873	}
874
875out:
876	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
877	mtx_unlock(&ioat->cleanup_lock);
878
879	/*
880	 * The device doesn't seem to reliably push suspend/halt statuses to
881	 * the channel completion memory address, so poll the device register
882	 * here.  For performance reasons skip it on interrupts, do it only
883	 * on much more rare polling events.
884	 */
885	if (!intr)
886		comp_update = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS;
887	if (!is_ioat_halted(comp_update) && !is_ioat_suspended(comp_update))
888		return;
889
890	ioat->stats.channel_halts++;
891
892	/*
893	 * Fatal programming error on this DMA channel.  Flush any outstanding
894	 * work with error status and restart the engine.
895	 */
896	mtx_lock(&ioat->submit_lock);
897	ioat->quiescing = TRUE;
898	mtx_unlock(&ioat->submit_lock);
899
900	/*
901	 * This is safe to do here because the submit queue is quiesced.  We
902	 * know that we will drain all outstanding events, so ioat_reset_hw
903	 * can't deadlock. It is necessary to protect other ioat_process_event
904	 * threads from racing ioat_reset_hw, reading an indeterminate hw
905	 * state, and attempting to continue issuing completions.
906	 */
907	mtx_lock(&ioat->cleanup_lock);
908	ioat->resetting_cleanup = TRUE;
909
910	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
911	if (1 <= g_ioat_debug_level)
912		ioat_halted_debug(ioat, chanerr);
913	ioat->stats.last_halt_chanerr = chanerr;
914
915	while (ioat_get_active(ioat) > 0) {
916		desc = ioat_get_ring_entry(ioat, ioat->tail);
917		dmadesc = &desc->bus_dmadesc;
918		CTR5(KTR_IOAT, "channel=%u completing desc idx %u (%p) err cb %p(%p)",
919		    ioat->chan_idx, ioat->tail, dmadesc, dmadesc->callback_fn,
920		    dmadesc->callback_arg);
921
922		if (dmadesc->callback_fn != NULL)
923			dmadesc->callback_fn(dmadesc->callback_arg,
924			    chanerr_to_errno(chanerr));
925
926		ioat->tail++;
927		ioat->stats.descriptors_processed++;
928		ioat->stats.descriptors_error++;
929	}
930	CTR5(KTR_IOAT, "%s channel=%u head=%u tail=%u active=%u", __func__,
931	    ioat->chan_idx, ioat->head, ioat->tail, ioat_get_active(ioat));
932
933	/* Clear error status */
934	ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
935
936	mtx_unlock(&ioat->cleanup_lock);
937
938	ioat_log_message(0, "Resetting channel to recover from error\n");
939	error = taskqueue_enqueue(taskqueue_thread, &ioat->reset_task);
940	KASSERT(error == 0,
941	    ("%s: taskqueue_enqueue failed: %d", __func__, error));
942}
943
944static void
945ioat_reset_hw_task(void *ctx, int pending __unused)
946{
947	struct ioat_softc *ioat;
948	int error __diagused;
949
950	ioat = ctx;
951	ioat_log_message(1, "%s: Resetting channel\n", __func__);
952
953	error = ioat_reset_hw(ioat);
954	KASSERT(error == 0, ("%s: reset failed: %d", __func__, error));
955}
956
957/*
958 * User API functions
959 */
960unsigned
961ioat_get_nchannels(void)
962{
963
964	return (ioat_channel_index);
965}
966
967bus_dmaengine_t
968ioat_get_dmaengine(uint32_t index, int flags)
969{
970	struct ioat_softc *ioat;
971
972	KASSERT((flags & ~(M_NOWAIT | M_WAITOK)) == 0,
973	    ("invalid flags: 0x%08x", flags));
974	KASSERT((flags & (M_NOWAIT | M_WAITOK)) != (M_NOWAIT | M_WAITOK),
975	    ("invalid wait | nowait"));
976
977	mtx_lock(&ioat_list_mtx);
978	if (index >= ioat_channel_index ||
979	    (ioat = ioat_channel[index]) == NULL) {
980		mtx_unlock(&ioat_list_mtx);
981		return (NULL);
982	}
983	mtx_lock(&ioat->submit_lock);
984	mtx_unlock(&ioat_list_mtx);
985
986	if (ioat->destroying) {
987		mtx_unlock(&ioat->submit_lock);
988		return (NULL);
989	}
990
991	ioat_get(ioat);
992	if (ioat->quiescing) {
993		if ((flags & M_NOWAIT) != 0) {
994			ioat_put(ioat);
995			mtx_unlock(&ioat->submit_lock);
996			return (NULL);
997		}
998
999		while (ioat->quiescing && !ioat->destroying)
1000			msleep(&ioat->quiescing, &ioat->submit_lock, 0, "getdma", 0);
1001
1002		if (ioat->destroying) {
1003			ioat_put(ioat);
1004			mtx_unlock(&ioat->submit_lock);
1005			return (NULL);
1006		}
1007	}
1008	mtx_unlock(&ioat->submit_lock);
1009	return (&ioat->dmaengine);
1010}
1011
1012void
1013ioat_put_dmaengine(bus_dmaengine_t dmaengine)
1014{
1015	struct ioat_softc *ioat;
1016
1017	ioat = to_ioat_softc(dmaengine);
1018	mtx_lock(&ioat->submit_lock);
1019	ioat_put(ioat);
1020	mtx_unlock(&ioat->submit_lock);
1021}
1022
1023int
1024ioat_get_hwversion(bus_dmaengine_t dmaengine)
1025{
1026	struct ioat_softc *ioat;
1027
1028	ioat = to_ioat_softc(dmaengine);
1029	return (ioat->version);
1030}
1031
1032size_t
1033ioat_get_max_io_size(bus_dmaengine_t dmaengine)
1034{
1035	struct ioat_softc *ioat;
1036
1037	ioat = to_ioat_softc(dmaengine);
1038	return (ioat->max_xfer_size);
1039}
1040
1041uint32_t
1042ioat_get_capabilities(bus_dmaengine_t dmaengine)
1043{
1044	struct ioat_softc *ioat;
1045
1046	ioat = to_ioat_softc(dmaengine);
1047	return (ioat->capabilities);
1048}
1049
1050int
1051ioat_get_domain(bus_dmaengine_t dmaengine, int *domain)
1052{
1053	struct ioat_softc *ioat;
1054
1055	ioat = to_ioat_softc(dmaengine);
1056	return (bus_get_domain(ioat->device, domain));
1057}
1058
1059int
1060ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay)
1061{
1062	struct ioat_softc *ioat;
1063
1064	ioat = to_ioat_softc(dmaengine);
1065	if (!ioat->intrdelay_supported)
1066		return (ENODEV);
1067	if (delay > ioat->intrdelay_max)
1068		return (ERANGE);
1069
1070	ioat_write_2(ioat, IOAT_INTRDELAY_OFFSET, delay);
1071	ioat->cached_intrdelay =
1072	    ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_US_MASK;
1073	return (0);
1074}
1075
1076uint16_t
1077ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine)
1078{
1079	struct ioat_softc *ioat;
1080
1081	ioat = to_ioat_softc(dmaengine);
1082	return (ioat->intrdelay_max);
1083}
1084
1085void
1086ioat_acquire(bus_dmaengine_t dmaengine)
1087{
1088	struct ioat_softc *ioat;
1089
1090	ioat = to_ioat_softc(dmaengine);
1091	mtx_lock(&ioat->submit_lock);
1092	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1093	ioat->acq_head = ioat->head;
1094}
1095
1096int
1097ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags)
1098{
1099	struct ioat_softc *ioat;
1100	int error;
1101
1102	ioat = to_ioat_softc(dmaengine);
1103	ioat_acquire(dmaengine);
1104
1105	error = ioat_reserve_space(ioat, n, mflags);
1106	if (error != 0)
1107		ioat_release(dmaengine);
1108	return (error);
1109}
1110
1111void
1112ioat_release(bus_dmaengine_t dmaengine)
1113{
1114	struct ioat_softc *ioat;
1115
1116	ioat = to_ioat_softc(dmaengine);
1117	CTR3(KTR_IOAT, "%s channel=%u dispatch1 head=%u", __func__,
1118	    ioat->chan_idx, ioat->head);
1119	KFAIL_POINT_CODE(DEBUG_FP, ioat_release, /* do nothing */);
1120	CTR3(KTR_IOAT, "%s channel=%u dispatch2 head=%u", __func__,
1121	    ioat->chan_idx, ioat->head);
1122
1123	if (ioat->acq_head != ioat->head) {
1124		ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET,
1125		    (uint16_t)ioat->head);
1126
1127		if (!callout_pending(&ioat->poll_timer)) {
1128			callout_reset_on(&ioat->poll_timer, 1,
1129			    ioat_poll_timer_callback, ioat, ioat->cpu);
1130		}
1131	}
1132	mtx_unlock(&ioat->submit_lock);
1133}
1134
1135static struct ioat_descriptor *
1136ioat_op_generic(struct ioat_softc *ioat, uint8_t op,
1137    uint32_t size, uint64_t src, uint64_t dst,
1138    bus_dmaengine_callback_t callback_fn, void *callback_arg,
1139    uint32_t flags)
1140{
1141	struct ioat_generic_hw_descriptor *hw_desc;
1142	struct ioat_descriptor *desc;
1143	bus_dma_segment_t seg;
1144	int mflags, nseg, error;
1145
1146	mtx_assert(&ioat->submit_lock, MA_OWNED);
1147
1148	KASSERT((flags & ~_DMA_GENERIC_FLAGS) == 0,
1149	    ("Unrecognized flag(s): %#x", flags & ~_DMA_GENERIC_FLAGS));
1150	KASSERT(size <= ioat->max_xfer_size, ("%s: size too big (%u > %u)",
1151	    __func__, (unsigned)size, ioat->max_xfer_size));
1152
1153	if ((flags & DMA_NO_WAIT) != 0)
1154		mflags = M_NOWAIT;
1155	else
1156		mflags = M_WAITOK;
1157
1158	if (ioat_reserve_space(ioat, 1, mflags) != 0)
1159		return (NULL);
1160
1161	desc = ioat_get_ring_entry(ioat, ioat->head);
1162	hw_desc = &ioat_get_descriptor(ioat, ioat->head)->generic;
1163
1164	hw_desc->u.control_raw = 0;
1165	hw_desc->u.control_generic.op = op;
1166	hw_desc->u.control_generic.completion_update = 1;
1167
1168	if ((flags & DMA_INT_EN) != 0)
1169		hw_desc->u.control_generic.int_enable = 1;
1170	if ((flags & DMA_FENCE) != 0)
1171		hw_desc->u.control_generic.fence = 1;
1172
1173	hw_desc->size = size;
1174
1175	if (src != 0) {
1176		nseg = -1;
1177		error = _bus_dmamap_load_phys(ioat->data_tag, desc->src_dmamap,
1178		    src, size, 0, &seg, &nseg);
1179		if (error != 0) {
1180			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1181			    " failed %d\n", __func__, error);
1182			return (NULL);
1183		}
1184		hw_desc->src_addr = seg.ds_addr;
1185	}
1186
1187	if (dst != 0) {
1188		nseg = -1;
1189		error = _bus_dmamap_load_phys(ioat->data_tag, desc->dst_dmamap,
1190		    dst, size, 0, &seg, &nseg);
1191		if (error != 0) {
1192			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1193			    " failed %d\n", __func__, error);
1194			return (NULL);
1195		}
1196		hw_desc->dest_addr = seg.ds_addr;
1197	}
1198
1199	desc->bus_dmadesc.callback_fn = callback_fn;
1200	desc->bus_dmadesc.callback_arg = callback_arg;
1201	return (desc);
1202}
1203
1204struct bus_dmadesc *
1205ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn,
1206    void *callback_arg, uint32_t flags)
1207{
1208	struct ioat_dma_hw_descriptor *hw_desc;
1209	struct ioat_descriptor *desc;
1210	struct ioat_softc *ioat;
1211
1212	ioat = to_ioat_softc(dmaengine);
1213	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1214
1215	desc = ioat_op_generic(ioat, IOAT_OP_COPY, 8, 0, 0, callback_fn,
1216	    callback_arg, flags);
1217	if (desc == NULL)
1218		return (NULL);
1219
1220	hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma;
1221	hw_desc->u.control.null = 1;
1222	ioat_submit_single(ioat);
1223	return (&desc->bus_dmadesc);
1224}
1225
1226struct bus_dmadesc *
1227ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
1228    bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn,
1229    void *callback_arg, uint32_t flags)
1230{
1231	struct ioat_dma_hw_descriptor *hw_desc;
1232	struct ioat_descriptor *desc;
1233	struct ioat_softc *ioat;
1234
1235	ioat = to_ioat_softc(dmaengine);
1236	desc = ioat_op_generic(ioat, IOAT_OP_COPY, len, src, dst, callback_fn,
1237	    callback_arg, flags);
1238	if (desc == NULL)
1239		return (NULL);
1240
1241	hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma;
1242	if (g_ioat_debug_level >= 3)
1243		dump_descriptor(hw_desc);
1244
1245	ioat_submit_single(ioat);
1246	CTR6(KTR_IOAT, "%s channel=%u desc=%p dest=%lx src=%lx len=%lx",
1247	    __func__, ioat->chan_idx, &desc->bus_dmadesc, dst, src, len);
1248	return (&desc->bus_dmadesc);
1249}
1250
1251struct bus_dmadesc *
1252ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1,
1253    bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2,
1254    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
1255{
1256	struct ioat_dma_hw_descriptor *hw_desc;
1257	struct ioat_descriptor *desc;
1258	struct ioat_softc *ioat;
1259	bus_size_t src1_len, dst1_len;
1260	bus_dma_segment_t seg;
1261	int nseg, error;
1262
1263	ioat = to_ioat_softc(dmaengine);
1264	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1265
1266	KASSERT(((src1 | src2 | dst1 | dst2) & PAGE_MASK) == 0,
1267	    ("%s: addresses are not page-aligned", __func__));
1268
1269	desc = ioat_op_generic(ioat, IOAT_OP_COPY, 2 * PAGE_SIZE, 0, 0,
1270	    callback_fn, callback_arg, flags);
1271	if (desc == NULL)
1272		return (NULL);
1273
1274	hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma;
1275
1276	src1_len = (src2 != src1 + PAGE_SIZE) ? PAGE_SIZE : 2 * PAGE_SIZE;
1277	nseg = -1;
1278	error = _bus_dmamap_load_phys(ioat->data_tag,
1279	    desc->src_dmamap, src1, src1_len, 0, &seg, &nseg);
1280	if (error != 0) {
1281		ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1282		    " failed %d\n", __func__, error);
1283		return (NULL);
1284	}
1285	hw_desc->src_addr = seg.ds_addr;
1286	if (src1_len != 2 * PAGE_SIZE) {
1287		hw_desc->u.control.src_page_break = 1;
1288		nseg = -1;
1289		error = _bus_dmamap_load_phys(ioat->data_tag,
1290		    desc->src2_dmamap, src2, PAGE_SIZE, 0, &seg, &nseg);
1291		if (error != 0) {
1292			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1293			    " failed %d\n", __func__, error);
1294			return (NULL);
1295		}
1296		hw_desc->next_src_addr = seg.ds_addr;
1297	}
1298
1299	dst1_len = (dst2 != dst1 + PAGE_SIZE) ? PAGE_SIZE : 2 * PAGE_SIZE;
1300	nseg = -1;
1301	error = _bus_dmamap_load_phys(ioat->data_tag,
1302	    desc->dst_dmamap, dst1, dst1_len, 0, &seg, &nseg);
1303	if (error != 0) {
1304		ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1305		    " failed %d\n", __func__, error);
1306		return (NULL);
1307	}
1308	hw_desc->dest_addr = seg.ds_addr;
1309	if (dst1_len != 2 * PAGE_SIZE) {
1310		hw_desc->u.control.dest_page_break = 1;
1311		nseg = -1;
1312		error = _bus_dmamap_load_phys(ioat->data_tag,
1313		    desc->dst2_dmamap, dst2, PAGE_SIZE, 0, &seg, &nseg);
1314		if (error != 0) {
1315			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1316			    " failed %d\n", __func__, error);
1317			return (NULL);
1318		}
1319		hw_desc->next_dest_addr = seg.ds_addr;
1320	}
1321
1322	if (g_ioat_debug_level >= 3)
1323		dump_descriptor(hw_desc);
1324
1325	ioat_submit_single(ioat);
1326	return (&desc->bus_dmadesc);
1327}
1328
1329struct bus_dmadesc *
1330ioat_copy_crc(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src,
1331    bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr,
1332    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
1333{
1334	struct ioat_crc32_hw_descriptor *hw_desc;
1335	struct ioat_descriptor *desc;
1336	struct ioat_softc *ioat;
1337	uint32_t teststore;
1338	uint8_t op;
1339	bus_dma_segment_t seg;
1340	int nseg, error;
1341
1342	ioat = to_ioat_softc(dmaengine);
1343	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1344
1345	KASSERT((ioat->capabilities & IOAT_DMACAP_MOVECRC) != 0,
1346	    ("%s: device lacks MOVECRC capability", __func__));
1347	teststore = (flags & _DMA_CRC_TESTSTORE);
1348	KASSERT(teststore != _DMA_CRC_TESTSTORE,
1349	    ("%s: TEST and STORE invalid", __func__));
1350	KASSERT(teststore != 0 || (flags & DMA_CRC_INLINE) == 0,
1351	    ("%s: INLINE invalid without TEST or STORE", __func__));
1352
1353	switch (teststore) {
1354	case DMA_CRC_STORE:
1355		op = IOAT_OP_MOVECRC_STORE;
1356		break;
1357	case DMA_CRC_TEST:
1358		op = IOAT_OP_MOVECRC_TEST;
1359		break;
1360	default:
1361		KASSERT(teststore == 0, ("bogus"));
1362		op = IOAT_OP_MOVECRC;
1363		break;
1364	}
1365
1366	desc = ioat_op_generic(ioat, op, len, src, dst, callback_fn,
1367	    callback_arg, flags & ~_DMA_CRC_FLAGS);
1368	if (desc == NULL)
1369		return (NULL);
1370
1371	hw_desc = &ioat_get_descriptor(ioat, desc->id)->crc32;
1372
1373	if ((flags & DMA_CRC_INLINE) == 0) {
1374		nseg = -1;
1375		error = _bus_dmamap_load_phys(ioat->data_tag,
1376		    desc->dst2_dmamap, crcptr, sizeof(uint32_t), 0,
1377		    &seg, &nseg);
1378		if (error != 0) {
1379			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1380			    " failed %d\n", __func__, error);
1381			return (NULL);
1382		}
1383		hw_desc->crc_address = seg.ds_addr;
1384	} else
1385		hw_desc->u.control.crc_location = 1;
1386
1387	if (initialseed != NULL) {
1388		hw_desc->u.control.use_seed = 1;
1389		hw_desc->seed = *initialseed;
1390	}
1391
1392	if (g_ioat_debug_level >= 3)
1393		dump_descriptor(hw_desc);
1394
1395	ioat_submit_single(ioat);
1396	return (&desc->bus_dmadesc);
1397}
1398
1399struct bus_dmadesc *
1400ioat_crc(bus_dmaengine_t dmaengine, bus_addr_t src, bus_size_t len,
1401    uint32_t *initialseed, bus_addr_t crcptr,
1402    bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
1403{
1404	struct ioat_crc32_hw_descriptor *hw_desc;
1405	struct ioat_descriptor *desc;
1406	struct ioat_softc *ioat;
1407	uint32_t teststore;
1408	uint8_t op;
1409	bus_dma_segment_t seg;
1410	int nseg, error;
1411
1412	ioat = to_ioat_softc(dmaengine);
1413	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1414
1415	KASSERT((ioat->capabilities & IOAT_DMACAP_CRC) != 0,
1416	    ("%s: device lacks CRC capability", __func__));
1417	teststore = (flags & _DMA_CRC_TESTSTORE);
1418	KASSERT(teststore != _DMA_CRC_TESTSTORE,
1419	    ("%s: TEST and STORE invalid", __func__));
1420	KASSERT(teststore != 0 || (flags & DMA_CRC_INLINE) == 0,
1421	    ("%s: INLINE invalid without TEST or STORE", __func__));
1422
1423	switch (teststore) {
1424	case DMA_CRC_STORE:
1425		op = IOAT_OP_CRC_STORE;
1426		break;
1427	case DMA_CRC_TEST:
1428		op = IOAT_OP_CRC_TEST;
1429		break;
1430	default:
1431		KASSERT(teststore == 0, ("bogus"));
1432		op = IOAT_OP_CRC;
1433		break;
1434	}
1435
1436	desc = ioat_op_generic(ioat, op, len, src, 0, callback_fn,
1437	    callback_arg, flags & ~_DMA_CRC_FLAGS);
1438	if (desc == NULL)
1439		return (NULL);
1440
1441	hw_desc = &ioat_get_descriptor(ioat, desc->id)->crc32;
1442
1443	if ((flags & DMA_CRC_INLINE) == 0) {
1444		nseg = -1;
1445		error = _bus_dmamap_load_phys(ioat->data_tag,
1446		    desc->dst2_dmamap, crcptr, sizeof(uint32_t), 0,
1447		    &seg, &nseg);
1448		if (error != 0) {
1449			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1450			    " failed %d\n", __func__, error);
1451			return (NULL);
1452		}
1453		hw_desc->crc_address = seg.ds_addr;
1454	} else
1455		hw_desc->u.control.crc_location = 1;
1456
1457	if (initialseed != NULL) {
1458		hw_desc->u.control.use_seed = 1;
1459		hw_desc->seed = *initialseed;
1460	}
1461
1462	if (g_ioat_debug_level >= 3)
1463		dump_descriptor(hw_desc);
1464
1465	ioat_submit_single(ioat);
1466	return (&desc->bus_dmadesc);
1467}
1468
1469struct bus_dmadesc *
1470ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern,
1471    bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg,
1472    uint32_t flags)
1473{
1474	struct ioat_fill_hw_descriptor *hw_desc;
1475	struct ioat_descriptor *desc;
1476	struct ioat_softc *ioat;
1477
1478	ioat = to_ioat_softc(dmaengine);
1479	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1480
1481	KASSERT((ioat->capabilities & IOAT_DMACAP_BFILL) != 0,
1482	    ("%s: device lacks BFILL capability", __func__));
1483
1484	desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, 0, dst,
1485	    callback_fn, callback_arg, flags);
1486	if (desc == NULL)
1487		return (NULL);
1488
1489	hw_desc = &ioat_get_descriptor(ioat, desc->id)->fill;
1490	hw_desc->src_data = fillpattern;
1491	if (g_ioat_debug_level >= 3)
1492		dump_descriptor(hw_desc);
1493
1494	ioat_submit_single(ioat);
1495	return (&desc->bus_dmadesc);
1496}
1497
1498/*
1499 * Ring Management
1500 */
1501static inline uint32_t
1502ioat_get_active(struct ioat_softc *ioat)
1503{
1504
1505	return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1));
1506}
1507
1508static inline uint32_t
1509ioat_get_ring_space(struct ioat_softc *ioat)
1510{
1511
1512	return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1);
1513}
1514
1515/*
1516 * Reserves space in this IOAT descriptor ring by ensuring enough slots remain
1517 * for 'num_descs'.
1518 *
1519 * If mflags contains M_WAITOK, blocks until enough space is available.
1520 *
1521 * Returns zero on success, or an errno on error.  If num_descs is beyond the
1522 * maximum ring size, returns EINVAl; if allocation would block and mflags
1523 * contains M_NOWAIT, returns EAGAIN.
1524 *
1525 * Must be called with the submit_lock held; returns with the lock held.  The
1526 * lock may be dropped to allocate the ring.
1527 *
1528 * (The submit_lock is needed to add any entries to the ring, so callers are
1529 * assured enough room is available.)
1530 */
1531static int
1532ioat_reserve_space(struct ioat_softc *ioat, uint32_t num_descs, int mflags)
1533{
1534	boolean_t dug;
1535	int error;
1536
1537	mtx_assert(&ioat->submit_lock, MA_OWNED);
1538	error = 0;
1539	dug = FALSE;
1540
1541	if (num_descs < 1 || num_descs >= (1 << ioat->ring_size_order)) {
1542		error = EINVAL;
1543		goto out;
1544	}
1545
1546	for (;;) {
1547		if (ioat->quiescing) {
1548			error = ENXIO;
1549			goto out;
1550		}
1551
1552		if (ioat_get_ring_space(ioat) >= num_descs)
1553			goto out;
1554
1555		CTR3(KTR_IOAT, "%s channel=%u starved (%u)", __func__,
1556		    ioat->chan_idx, num_descs);
1557
1558		if (!dug && !ioat->is_submitter_processing) {
1559			ioat->is_submitter_processing = TRUE;
1560			mtx_unlock(&ioat->submit_lock);
1561
1562			CTR2(KTR_IOAT, "%s channel=%u attempting to process events",
1563			    __func__, ioat->chan_idx);
1564			ioat_process_events(ioat, FALSE);
1565
1566			mtx_lock(&ioat->submit_lock);
1567			dug = TRUE;
1568			KASSERT(ioat->is_submitter_processing == TRUE,
1569			    ("is_submitter_processing"));
1570			ioat->is_submitter_processing = FALSE;
1571			wakeup(&ioat->tail);
1572			continue;
1573		}
1574
1575		if ((mflags & M_WAITOK) == 0) {
1576			error = EAGAIN;
1577			break;
1578		}
1579		CTR2(KTR_IOAT, "%s channel=%u blocking on completions",
1580		    __func__, ioat->chan_idx);
1581		msleep(&ioat->tail, &ioat->submit_lock, 0,
1582		    "ioat_full", 0);
1583		continue;
1584	}
1585
1586out:
1587	mtx_assert(&ioat->submit_lock, MA_OWNED);
1588	KASSERT(!ioat->quiescing || error == ENXIO,
1589	    ("reserved during quiesce"));
1590	return (error);
1591}
1592
1593static void
1594ioat_free_ring(struct ioat_softc *ioat, uint32_t size,
1595    struct ioat_descriptor *ring)
1596{
1597
1598	free(ring, M_IOAT);
1599}
1600
1601static struct ioat_descriptor *
1602ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index)
1603{
1604
1605	return (&ioat->ring[index % (1 << ioat->ring_size_order)]);
1606}
1607
1608static union ioat_hw_descriptor *
1609ioat_get_descriptor(struct ioat_softc *ioat, uint32_t index)
1610{
1611
1612	return (&ioat->hw_desc_ring[index % (1 << ioat->ring_size_order)]);
1613}
1614
1615static void
1616ioat_halted_debug(struct ioat_softc *ioat, uint32_t chanerr)
1617{
1618	union ioat_hw_descriptor *desc;
1619
1620	ioat_log_message(0, "Channel halted (%b)\n", (int)chanerr,
1621	    IOAT_CHANERR_STR);
1622	if (chanerr == 0)
1623		return;
1624
1625	mtx_assert(&ioat->cleanup_lock, MA_OWNED);
1626
1627	desc = ioat_get_descriptor(ioat, ioat->tail + 0);
1628	dump_descriptor(desc);
1629
1630	desc = ioat_get_descriptor(ioat, ioat->tail + 1);
1631	dump_descriptor(desc);
1632}
1633
1634static void
1635ioat_poll_timer_callback(void *arg)
1636{
1637	struct ioat_softc *ioat;
1638
1639	ioat = arg;
1640	CTR1(KTR_IOAT, "%s", __func__);
1641
1642	ioat_process_events(ioat, FALSE);
1643
1644	mtx_lock(&ioat->submit_lock);
1645	if (ioat_get_active(ioat) > 0)
1646		callout_schedule(&ioat->poll_timer, 1);
1647	mtx_unlock(&ioat->submit_lock);
1648}
1649
1650/*
1651 * Support Functions
1652 */
1653static void
1654ioat_submit_single(struct ioat_softc *ioat)
1655{
1656
1657	mtx_assert(&ioat->submit_lock, MA_OWNED);
1658
1659	ioat->head++;
1660	CTR4(KTR_IOAT, "%s channel=%u head=%u tail=%u", __func__,
1661	    ioat->chan_idx, ioat->head, ioat->tail);
1662
1663	ioat->stats.descriptors_submitted++;
1664}
1665
1666static int
1667ioat_reset_hw(struct ioat_softc *ioat)
1668{
1669	uint64_t status;
1670	uint32_t chanerr;
1671	unsigned timeout;
1672	int error;
1673
1674	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1675
1676	mtx_lock(&ioat->submit_lock);
1677	while (ioat->resetting && !ioat->destroying)
1678		msleep(&ioat->resetting, &ioat->submit_lock, 0, "IRH_drain", 0);
1679	if (ioat->destroying) {
1680		mtx_unlock(&ioat->submit_lock);
1681		return (ENXIO);
1682	}
1683	ioat->resetting = TRUE;
1684	ioat->quiescing = TRUE;
1685	mtx_unlock(&ioat->submit_lock);
1686	mtx_lock(&ioat->cleanup_lock);
1687	while (ioat_get_active(ioat) > 0)
1688		msleep(&ioat->tail, &ioat->cleanup_lock, 0, "ioat_drain", 1);
1689
1690	/*
1691	 * Suspend ioat_process_events while the hardware and softc are in an
1692	 * indeterminate state.
1693	 */
1694	ioat->resetting_cleanup = TRUE;
1695	mtx_unlock(&ioat->cleanup_lock);
1696
1697	CTR2(KTR_IOAT, "%s channel=%u quiesced and drained", __func__,
1698	    ioat->chan_idx);
1699
1700	status = ioat_get_chansts(ioat);
1701	if (is_ioat_active(status) || is_ioat_idle(status))
1702		ioat_suspend(ioat);
1703
1704	/* Wait at most 20 ms */
1705	for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) &&
1706	    timeout < 20; timeout++) {
1707		DELAY(1000);
1708		status = ioat_get_chansts(ioat);
1709	}
1710	if (timeout == 20) {
1711		error = ETIMEDOUT;
1712		goto out;
1713	}
1714
1715	KASSERT(ioat_get_active(ioat) == 0, ("active after quiesce"));
1716
1717	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
1718	ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
1719
1720	CTR2(KTR_IOAT, "%s channel=%u hardware suspended", __func__,
1721	    ioat->chan_idx);
1722
1723	/*
1724	 * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors
1725	 *  that can cause stability issues for IOAT v3.
1726	 */
1727	pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07,
1728	    4);
1729	chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4);
1730	pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4);
1731
1732	/*
1733	 * BDXDE and BWD models reset MSI-X registers on device reset.
1734	 * Save/restore their contents manually.
1735	 */
1736	if (ioat_model_resets_msix(ioat)) {
1737		ioat_log_message(1, "device resets MSI-X registers; saving\n");
1738		pci_save_state(ioat->device);
1739	}
1740
1741	ioat_reset(ioat);
1742	CTR2(KTR_IOAT, "%s channel=%u hardware reset", __func__,
1743	    ioat->chan_idx);
1744
1745	/* Wait at most 20 ms */
1746	for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++)
1747		DELAY(1000);
1748	if (timeout == 20) {
1749		error = ETIMEDOUT;
1750		goto out;
1751	}
1752
1753	if (ioat_model_resets_msix(ioat)) {
1754		ioat_log_message(1, "device resets registers; restored\n");
1755		pci_restore_state(ioat->device);
1756	}
1757
1758	/* Reset attempts to return the hardware to "halted." */
1759	status = ioat_get_chansts(ioat);
1760	if (is_ioat_active(status) || is_ioat_idle(status)) {
1761		/* So this really shouldn't happen... */
1762		ioat_log_message(0, "Device is active after a reset?\n");
1763		ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
1764		error = 0;
1765		goto out;
1766	}
1767
1768	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
1769	if (chanerr != 0) {
1770		mtx_lock(&ioat->cleanup_lock);
1771		ioat_halted_debug(ioat, chanerr);
1772		mtx_unlock(&ioat->cleanup_lock);
1773		error = EIO;
1774		goto out;
1775	}
1776
1777	/*
1778	 * Bring device back online after reset.  Writing CHAINADDR brings the
1779	 * device back to active.
1780	 *
1781	 * The internal ring counter resets to zero, so we have to start over
1782	 * at zero as well.
1783	 */
1784	ioat->tail = ioat->head = 0;
1785	*ioat->comp_update = ioat->last_seen =
1786	    RING_PHYS_ADDR(ioat, ioat->tail - 1);
1787
1788	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
1789	ioat_write_chancmp(ioat, ioat->comp_update_bus_addr);
1790	ioat_write_chainaddr(ioat, RING_PHYS_ADDR(ioat, 0));
1791	error = 0;
1792	CTR2(KTR_IOAT, "%s channel=%u configured channel", __func__,
1793	    ioat->chan_idx);
1794
1795out:
1796	/* Enqueues a null operation and ensures it completes. */
1797	if (error == 0) {
1798		error = ioat_start_channel(ioat);
1799		CTR2(KTR_IOAT, "%s channel=%u started channel", __func__,
1800		    ioat->chan_idx);
1801	}
1802
1803	/*
1804	 * Resume completions now that ring state is consistent.
1805	 */
1806	mtx_lock(&ioat->cleanup_lock);
1807	ioat->resetting_cleanup = FALSE;
1808	mtx_unlock(&ioat->cleanup_lock);
1809
1810	/* Unblock submission of new work */
1811	mtx_lock(&ioat->submit_lock);
1812	ioat->quiescing = FALSE;
1813	wakeup(&ioat->quiescing);
1814
1815	ioat->resetting = FALSE;
1816	wakeup(&ioat->resetting);
1817
1818	CTR2(KTR_IOAT, "%s channel=%u reset done", __func__, ioat->chan_idx);
1819	mtx_unlock(&ioat->submit_lock);
1820
1821	return (error);
1822}
1823
1824static int
1825sysctl_handle_chansts(SYSCTL_HANDLER_ARGS)
1826{
1827	struct ioat_softc *ioat;
1828	struct sbuf sb;
1829	uint64_t status;
1830	int error;
1831
1832	ioat = arg1;
1833
1834	status = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS;
1835
1836	sbuf_new_for_sysctl(&sb, NULL, 256, req);
1837	switch (status) {
1838	case IOAT_CHANSTS_ACTIVE:
1839		sbuf_printf(&sb, "ACTIVE");
1840		break;
1841	case IOAT_CHANSTS_IDLE:
1842		sbuf_printf(&sb, "IDLE");
1843		break;
1844	case IOAT_CHANSTS_SUSPENDED:
1845		sbuf_printf(&sb, "SUSPENDED");
1846		break;
1847	case IOAT_CHANSTS_HALTED:
1848		sbuf_printf(&sb, "HALTED");
1849		break;
1850	case IOAT_CHANSTS_ARMED:
1851		sbuf_printf(&sb, "ARMED");
1852		break;
1853	default:
1854		sbuf_printf(&sb, "UNKNOWN");
1855		break;
1856	}
1857	error = sbuf_finish(&sb);
1858	sbuf_delete(&sb);
1859
1860	if (error != 0 || req->newptr == NULL)
1861		return (error);
1862	return (EINVAL);
1863}
1864
1865static int
1866sysctl_handle_dpi(SYSCTL_HANDLER_ARGS)
1867{
1868	struct ioat_softc *ioat;
1869	struct sbuf sb;
1870#define	PRECISION	"1"
1871	const uintmax_t factor = 10;
1872	uintmax_t rate;
1873	int error;
1874
1875	ioat = arg1;
1876	sbuf_new_for_sysctl(&sb, NULL, 16, req);
1877
1878	if (ioat->stats.interrupts == 0) {
1879		sbuf_printf(&sb, "NaN");
1880		goto out;
1881	}
1882	rate = ioat->stats.descriptors_processed * factor /
1883	    ioat->stats.interrupts;
1884	sbuf_printf(&sb, "%ju.%." PRECISION "ju", rate / factor,
1885	    rate % factor);
1886#undef	PRECISION
1887out:
1888	error = sbuf_finish(&sb);
1889	sbuf_delete(&sb);
1890	if (error != 0 || req->newptr == NULL)
1891		return (error);
1892	return (EINVAL);
1893}
1894
1895static int
1896sysctl_handle_reset(SYSCTL_HANDLER_ARGS)
1897{
1898	struct ioat_softc *ioat;
1899	int error, arg;
1900
1901	ioat = arg1;
1902
1903	arg = 0;
1904	error = SYSCTL_OUT(req, &arg, sizeof(arg));
1905	if (error != 0 || req->newptr == NULL)
1906		return (error);
1907
1908	error = SYSCTL_IN(req, &arg, sizeof(arg));
1909	if (error != 0)
1910		return (error);
1911
1912	if (arg != 0)
1913		error = ioat_reset_hw(ioat);
1914
1915	return (error);
1916}
1917
1918static void
1919dump_descriptor(void *hw_desc)
1920{
1921	int i, j;
1922
1923	for (i = 0; i < 2; i++) {
1924		for (j = 0; j < 8; j++)
1925			printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]);
1926		printf("\n");
1927	}
1928}
1929
1930static void
1931ioat_setup_sysctl(device_t device)
1932{
1933	struct sysctl_oid_list *par, *statpar, *state, *hammer;
1934	struct sysctl_ctx_list *ctx;
1935	struct sysctl_oid *tree, *tmp;
1936	struct ioat_softc *ioat;
1937
1938	ioat = DEVICE2SOFTC(device);
1939	ctx = device_get_sysctl_ctx(device);
1940	tree = device_get_sysctl_tree(device);
1941	par = SYSCTL_CHILDREN(tree);
1942
1943	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "version", CTLFLAG_RD,
1944	    &ioat->version, 0, "HW version (0xMM form)");
1945	SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD,
1946	    &ioat->max_xfer_size, 0, "HW maximum transfer size");
1947	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "intrdelay_supported", CTLFLAG_RD,
1948	    &ioat->intrdelay_supported, 0, "Is INTRDELAY supported");
1949	SYSCTL_ADD_U16(ctx, par, OID_AUTO, "intrdelay_max", CTLFLAG_RD,
1950	    &ioat->intrdelay_max, 0,
1951	    "Maximum configurable INTRDELAY on this channel (microseconds)");
1952
1953	tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "state",
1954	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IOAT channel internal state");
1955	state = SYSCTL_CHILDREN(tmp);
1956
1957	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "ring_size_order", CTLFLAG_RD,
1958	    &ioat->ring_size_order, 0, "SW descriptor ring size order");
1959	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "head", CTLFLAG_RD, &ioat->head,
1960	    0, "SW descriptor head pointer index");
1961	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail,
1962	    0, "SW descriptor tail pointer index");
1963
1964	SYSCTL_ADD_UQUAD(ctx, state, OID_AUTO, "last_completion", CTLFLAG_RD,
1965	    ioat->comp_update, "HW addr of last completion");
1966
1967	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_submitter_processing",
1968	    CTLFLAG_RD, &ioat->is_submitter_processing, 0,
1969	    "submitter processing");
1970
1971	SYSCTL_ADD_PROC(ctx, state, OID_AUTO, "chansts",
1972	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, ioat, 0,
1973	    sysctl_handle_chansts, "A", "String of the channel status");
1974
1975	SYSCTL_ADD_U16(ctx, state, OID_AUTO, "intrdelay", CTLFLAG_RD,
1976	    &ioat->cached_intrdelay, 0,
1977	    "Current INTRDELAY on this channel (cached, microseconds)");
1978
1979	tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "hammer",
1980	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1981	    "Big hammers (mostly for testing)");
1982	hammer = SYSCTL_CHILDREN(tmp);
1983
1984	SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_reset",
1985	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, ioat, 0,
1986	    sysctl_handle_reset, "I", "Set to non-zero to reset the hardware");
1987
1988	tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "stats",
1989	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IOAT channel statistics");
1990	statpar = SYSCTL_CHILDREN(tmp);
1991
1992	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "interrupts",
1993	    CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.interrupts,
1994	    "Number of interrupts processed on this channel");
1995	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "descriptors",
1996	    CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.descriptors_processed,
1997	    "Number of descriptors processed on this channel");
1998	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "submitted",
1999	    CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.descriptors_submitted,
2000	    "Number of descriptors submitted to this channel");
2001	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "errored",
2002	    CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.descriptors_error,
2003	    "Number of descriptors failed by channel errors");
2004	SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "halts",
2005	    CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.channel_halts, 0,
2006	    "Number of times the channel has halted");
2007	SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "last_halt_chanerr",
2008	    CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.last_halt_chanerr, 0,
2009	    "The raw CHANERR when the channel was last halted");
2010
2011	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "desc_per_interrupt",
2012	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, ioat, 0,
2013	    sysctl_handle_dpi, "A", "Descriptors per interrupt");
2014}
2015
2016static void
2017ioat_get(struct ioat_softc *ioat)
2018{
2019
2020	mtx_assert(&ioat->submit_lock, MA_OWNED);
2021	KASSERT(ioat->refcnt < UINT32_MAX, ("refcnt overflow"));
2022
2023	ioat->refcnt++;
2024}
2025
2026static void
2027ioat_put(struct ioat_softc *ioat)
2028{
2029
2030	mtx_assert(&ioat->submit_lock, MA_OWNED);
2031	KASSERT(ioat->refcnt >= 1, ("refcnt error"));
2032
2033	if (--ioat->refcnt == 0)
2034		wakeup(&ioat->refcnt);
2035}
2036
2037static void
2038ioat_drain_locked(struct ioat_softc *ioat)
2039{
2040
2041	mtx_assert(&ioat->submit_lock, MA_OWNED);
2042
2043	while (ioat->refcnt > 0)
2044		msleep(&ioat->refcnt, &ioat->submit_lock, 0, "ioat_drain", 0);
2045}
2046
2047#ifdef DDB
2048#define	_db_show_lock(lo)	LOCK_CLASS(lo)->lc_ddb_show(lo)
2049#define	db_show_lock(lk)	_db_show_lock(&(lk)->lock_object)
2050DB_SHOW_COMMAND(ioat, db_show_ioat)
2051{
2052	struct ioat_softc *sc;
2053	unsigned idx;
2054
2055	if (!have_addr)
2056		goto usage;
2057	idx = (unsigned)addr;
2058	if (idx >= ioat_channel_index)
2059		goto usage;
2060
2061	sc = ioat_channel[idx];
2062	db_printf("ioat softc at %p\n", sc);
2063	if (sc == NULL)
2064		return;
2065
2066	db_printf(" version: %d\n", sc->version);
2067	db_printf(" chan_idx: %u\n", sc->chan_idx);
2068	db_printf(" submit_lock: ");
2069	db_show_lock(&sc->submit_lock);
2070
2071	db_printf(" capabilities: %b\n", (int)sc->capabilities,
2072	    IOAT_DMACAP_STR);
2073	db_printf(" cached_intrdelay: %u\n", sc->cached_intrdelay);
2074	db_printf(" *comp_update: 0x%jx\n", (uintmax_t)*sc->comp_update);
2075
2076	db_printf(" poll_timer:\n");
2077	db_printf("  c_time: %ju\n", (uintmax_t)sc->poll_timer.c_time);
2078	db_printf("  c_arg: %p\n", sc->poll_timer.c_arg);
2079	db_printf("  c_func: %p\n", sc->poll_timer.c_func);
2080	db_printf("  c_lock: %p\n", sc->poll_timer.c_lock);
2081	db_printf("  c_flags: 0x%x\n", (unsigned)sc->poll_timer.c_flags);
2082
2083	db_printf(" quiescing: %d\n", (int)sc->quiescing);
2084	db_printf(" destroying: %d\n", (int)sc->destroying);
2085	db_printf(" is_submitter_processing: %d\n",
2086	    (int)sc->is_submitter_processing);
2087	db_printf(" intrdelay_supported: %d\n", (int)sc->intrdelay_supported);
2088	db_printf(" resetting: %d\n", (int)sc->resetting);
2089
2090	db_printf(" head: %u\n", sc->head);
2091	db_printf(" tail: %u\n", sc->tail);
2092	db_printf(" ring_size_order: %u\n", sc->ring_size_order);
2093	db_printf(" last_seen: 0x%lx\n", sc->last_seen);
2094	db_printf(" ring: %p\n", sc->ring);
2095	db_printf(" descriptors: %p\n", sc->hw_desc_ring);
2096	db_printf(" descriptors (phys): 0x%jx\n",
2097	    (uintmax_t)sc->hw_desc_bus_addr);
2098
2099	db_printf("  ring[%u] (tail):\n", sc->tail %
2100	    (1 << sc->ring_size_order));
2101	db_printf("   id: %u\n", ioat_get_ring_entry(sc, sc->tail)->id);
2102	db_printf("   addr: 0x%lx\n",
2103	    RING_PHYS_ADDR(sc, sc->tail));
2104	db_printf("   next: 0x%lx\n",
2105	     ioat_get_descriptor(sc, sc->tail)->generic.next);
2106
2107	db_printf("  ring[%u] (head - 1):\n", (sc->head - 1) %
2108	    (1 << sc->ring_size_order));
2109	db_printf("   id: %u\n", ioat_get_ring_entry(sc, sc->head - 1)->id);
2110	db_printf("   addr: 0x%lx\n",
2111	    RING_PHYS_ADDR(sc, sc->head - 1));
2112	db_printf("   next: 0x%lx\n",
2113	     ioat_get_descriptor(sc, sc->head - 1)->generic.next);
2114
2115	db_printf("  ring[%u] (head):\n", (sc->head) %
2116	    (1 << sc->ring_size_order));
2117	db_printf("   id: %u\n", ioat_get_ring_entry(sc, sc->head)->id);
2118	db_printf("   addr: 0x%lx\n",
2119	    RING_PHYS_ADDR(sc, sc->head));
2120	db_printf("   next: 0x%lx\n",
2121	     ioat_get_descriptor(sc, sc->head)->generic.next);
2122
2123	for (idx = 0; idx < (1 << sc->ring_size_order); idx++)
2124		if ((*sc->comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK)
2125		    == RING_PHYS_ADDR(sc, idx))
2126			db_printf("  ring[%u] == hardware tail\n", idx);
2127
2128	db_printf(" cleanup_lock: ");
2129	db_show_lock(&sc->cleanup_lock);
2130
2131	db_printf(" refcnt: %u\n", sc->refcnt);
2132	db_printf(" stats:\n");
2133	db_printf("  interrupts: %lu\n", sc->stats.interrupts);
2134	db_printf("  descriptors_processed: %lu\n", sc->stats.descriptors_processed);
2135	db_printf("  descriptors_error: %lu\n", sc->stats.descriptors_error);
2136	db_printf("  descriptors_submitted: %lu\n", sc->stats.descriptors_submitted);
2137
2138	db_printf("  channel_halts: %u\n", sc->stats.channel_halts);
2139	db_printf("  last_halt_chanerr: %u\n", sc->stats.last_halt_chanerr);
2140
2141	if (db_pager_quit)
2142		return;
2143
2144	db_printf(" hw status:\n");
2145	db_printf("  status: 0x%lx\n", ioat_get_chansts(sc));
2146	db_printf("  chanctrl: 0x%x\n",
2147	    (unsigned)ioat_read_2(sc, IOAT_CHANCTRL_OFFSET));
2148	db_printf("  chancmd: 0x%x\n",
2149	    (unsigned)ioat_read_1(sc, IOAT_CHANCMD_OFFSET));
2150	db_printf("  dmacount: 0x%x\n",
2151	    (unsigned)ioat_read_2(sc, IOAT_DMACOUNT_OFFSET));
2152	db_printf("  chainaddr: 0x%lx\n",
2153	    ioat_read_double_4(sc, IOAT_CHAINADDR_OFFSET_LOW));
2154	db_printf("  chancmp: 0x%lx\n",
2155	    ioat_read_double_4(sc, IOAT_CHANCMP_OFFSET_LOW));
2156	db_printf("  chanerr: %b\n",
2157	    (int)ioat_read_4(sc, IOAT_CHANERR_OFFSET), IOAT_CHANERR_STR);
2158	return;
2159usage:
2160	db_printf("usage: show ioat <0-%u>\n", ioat_channel_index);
2161	return;
2162}
2163#endif /* DDB */
2164