ioat.c revision 289982
1/*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/ioat/ioat.c 289982 2015-10-26 03:30:38Z cem $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/bus.h>
33#include <sys/conf.h>
34#include <sys/ioccom.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/module.h>
39#include <sys/mutex.h>
40#include <sys/rman.h>
41#include <sys/sysctl.h>
42#include <sys/time.h>
43#include <dev/pci/pcireg.h>
44#include <dev/pci/pcivar.h>
45#include <machine/bus.h>
46#include <machine/resource.h>
47#include <machine/stdarg.h>
48
49#include "ioat.h"
50#include "ioat_hw.h"
51#include "ioat_internal.h"
52
53#define	IOAT_INTR_TIMO	(hz / 10)
54#define	IOAT_REFLK	(&ioat->submit_lock)
55
56static int ioat_probe(device_t device);
57static int ioat_attach(device_t device);
58static int ioat_detach(device_t device);
59static int ioat_setup_intr(struct ioat_softc *ioat);
60static int ioat_teardown_intr(struct ioat_softc *ioat);
61static int ioat3_attach(device_t device);
62static int ioat_start_channel(struct ioat_softc *ioat);
63static int ioat_map_pci_bar(struct ioat_softc *ioat);
64static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg,
65    int error);
66static void ioat_interrupt_handler(void *arg);
67static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat);
68static void ioat_process_events(struct ioat_softc *ioat);
69static inline uint32_t ioat_get_active(struct ioat_softc *ioat);
70static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat);
71static void ioat_free_ring(struct ioat_softc *, uint32_t size,
72    struct ioat_descriptor **);
73static void ioat_free_ring_entry(struct ioat_softc *ioat,
74    struct ioat_descriptor *desc);
75static struct ioat_descriptor *ioat_alloc_ring_entry(struct ioat_softc *,
76    int mflags);
77static int ioat_reserve_space(struct ioat_softc *, uint32_t, int mflags);
78static struct ioat_descriptor *ioat_get_ring_entry(struct ioat_softc *ioat,
79    uint32_t index);
80static struct ioat_descriptor **ioat_prealloc_ring(struct ioat_softc *,
81    uint32_t size, boolean_t need_dscr, int mflags);
82static int ring_grow(struct ioat_softc *, uint32_t oldorder,
83    struct ioat_descriptor **);
84static int ring_shrink(struct ioat_softc *, uint32_t oldorder,
85    struct ioat_descriptor **);
86static void ioat_timer_callback(void *arg);
87static void dump_descriptor(void *hw_desc);
88static void ioat_submit_single(struct ioat_softc *ioat);
89static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg,
90    int error);
91static int ioat_reset_hw(struct ioat_softc *ioat);
92static void ioat_setup_sysctl(device_t device);
93static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS);
94static inline struct ioat_softc *ioat_get(struct ioat_softc *,
95    enum ioat_ref_kind);
96static inline void ioat_put(struct ioat_softc *, enum ioat_ref_kind);
97static inline void ioat_putn(struct ioat_softc *, uint32_t,
98    enum ioat_ref_kind);
99static void ioat_drain(struct ioat_softc *);
100
101#define	ioat_log_message(v, ...) do {					\
102	if ((v) <= g_ioat_debug_level) {				\
103		device_printf(ioat->device, __VA_ARGS__);		\
104	}								\
105} while (0)
106
107MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations");
108SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD, 0, "ioat node");
109
110static int g_force_legacy_interrupts;
111SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN,
112    &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled");
113
114int g_ioat_debug_level = 0;
115SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level,
116    0, "Set log level (0-3) for ioat(4). Higher is more verbose.");
117
118/*
119 * OS <-> Driver interface structures
120 */
121static device_method_t ioat_pci_methods[] = {
122	/* Device interface */
123	DEVMETHOD(device_probe,     ioat_probe),
124	DEVMETHOD(device_attach,    ioat_attach),
125	DEVMETHOD(device_detach,    ioat_detach),
126	{ 0, 0 }
127};
128
129static driver_t ioat_pci_driver = {
130	"ioat",
131	ioat_pci_methods,
132	sizeof(struct ioat_softc),
133};
134
135static devclass_t ioat_devclass;
136DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_devclass, 0, 0);
137
138/*
139 * Private data structures
140 */
141static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS];
142static int ioat_channel_index = 0;
143SYSCTL_INT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0,
144    "Number of IOAT channels attached");
145
146static struct _pcsid
147{
148	u_int32_t   type;
149	const char  *desc;
150} pci_ids[] = {
151	{ 0x34308086, "TBG IOAT Ch0" },
152	{ 0x34318086, "TBG IOAT Ch1" },
153	{ 0x34328086, "TBG IOAT Ch2" },
154	{ 0x34338086, "TBG IOAT Ch3" },
155	{ 0x34298086, "TBG IOAT Ch4" },
156	{ 0x342a8086, "TBG IOAT Ch5" },
157	{ 0x342b8086, "TBG IOAT Ch6" },
158	{ 0x342c8086, "TBG IOAT Ch7" },
159
160	{ 0x37108086, "JSF IOAT Ch0" },
161	{ 0x37118086, "JSF IOAT Ch1" },
162	{ 0x37128086, "JSF IOAT Ch2" },
163	{ 0x37138086, "JSF IOAT Ch3" },
164	{ 0x37148086, "JSF IOAT Ch4" },
165	{ 0x37158086, "JSF IOAT Ch5" },
166	{ 0x37168086, "JSF IOAT Ch6" },
167	{ 0x37178086, "JSF IOAT Ch7" },
168	{ 0x37188086, "JSF IOAT Ch0 (RAID)" },
169	{ 0x37198086, "JSF IOAT Ch1 (RAID)" },
170
171	{ 0x3c208086, "SNB IOAT Ch0" },
172	{ 0x3c218086, "SNB IOAT Ch1" },
173	{ 0x3c228086, "SNB IOAT Ch2" },
174	{ 0x3c238086, "SNB IOAT Ch3" },
175	{ 0x3c248086, "SNB IOAT Ch4" },
176	{ 0x3c258086, "SNB IOAT Ch5" },
177	{ 0x3c268086, "SNB IOAT Ch6" },
178	{ 0x3c278086, "SNB IOAT Ch7" },
179	{ 0x3c2e8086, "SNB IOAT Ch0 (RAID)" },
180	{ 0x3c2f8086, "SNB IOAT Ch1 (RAID)" },
181
182	{ 0x0e208086, "IVB IOAT Ch0" },
183	{ 0x0e218086, "IVB IOAT Ch1" },
184	{ 0x0e228086, "IVB IOAT Ch2" },
185	{ 0x0e238086, "IVB IOAT Ch3" },
186	{ 0x0e248086, "IVB IOAT Ch4" },
187	{ 0x0e258086, "IVB IOAT Ch5" },
188	{ 0x0e268086, "IVB IOAT Ch6" },
189	{ 0x0e278086, "IVB IOAT Ch7" },
190	{ 0x0e2e8086, "IVB IOAT Ch0 (RAID)" },
191	{ 0x0e2f8086, "IVB IOAT Ch1 (RAID)" },
192
193	{ 0x2f208086, "HSW IOAT Ch0" },
194	{ 0x2f218086, "HSW IOAT Ch1" },
195	{ 0x2f228086, "HSW IOAT Ch2" },
196	{ 0x2f238086, "HSW IOAT Ch3" },
197	{ 0x2f248086, "HSW IOAT Ch4" },
198	{ 0x2f258086, "HSW IOAT Ch5" },
199	{ 0x2f268086, "HSW IOAT Ch6" },
200	{ 0x2f278086, "HSW IOAT Ch7" },
201	{ 0x2f2e8086, "HSW IOAT Ch0 (RAID)" },
202	{ 0x2f2f8086, "HSW IOAT Ch1 (RAID)" },
203
204	{ 0x0c508086, "BWD IOAT Ch0" },
205	{ 0x0c518086, "BWD IOAT Ch1" },
206	{ 0x0c528086, "BWD IOAT Ch2" },
207	{ 0x0c538086, "BWD IOAT Ch3" },
208
209	{ 0x6f508086, "BDXDE IOAT Ch0" },
210	{ 0x6f518086, "BDXDE IOAT Ch1" },
211	{ 0x6f528086, "BDXDE IOAT Ch2" },
212	{ 0x6f538086, "BDXDE IOAT Ch3" },
213
214	{ 0x00000000, NULL           }
215};
216
217/*
218 * OS <-> Driver linkage functions
219 */
220static int
221ioat_probe(device_t device)
222{
223	struct _pcsid *ep;
224	u_int32_t type;
225
226	type = pci_get_devid(device);
227	for (ep = pci_ids; ep->type; ep++) {
228		if (ep->type == type) {
229			device_set_desc(device, ep->desc);
230			return (0);
231		}
232	}
233	return (ENXIO);
234}
235
236static int
237ioat_attach(device_t device)
238{
239	struct ioat_softc *ioat;
240	int error;
241
242	ioat = DEVICE2SOFTC(device);
243	ioat->device = device;
244
245	error = ioat_map_pci_bar(ioat);
246	if (error != 0)
247		goto err;
248
249	ioat->version = ioat_read_cbver(ioat);
250	if (ioat->version < IOAT_VER_3_0) {
251		error = ENODEV;
252		goto err;
253	}
254
255	error = ioat3_attach(device);
256	if (error != 0)
257		goto err;
258
259	error = pci_enable_busmaster(device);
260	if (error != 0)
261		goto err;
262
263	error = ioat_setup_intr(ioat);
264	if (error != 0)
265		goto err;
266
267	error = ioat_reset_hw(ioat);
268	if (error != 0)
269		goto err;
270
271	ioat_process_events(ioat);
272	ioat_setup_sysctl(device);
273
274	ioat_channel[ioat_channel_index++] = ioat;
275	ioat_test_attach();
276
277err:
278	if (error != 0)
279		ioat_detach(device);
280	return (error);
281}
282
283static int
284ioat_detach(device_t device)
285{
286	struct ioat_softc *ioat;
287
288	ioat = DEVICE2SOFTC(device);
289
290	ioat_test_detach();
291	ioat_drain(ioat);
292
293	ioat_teardown_intr(ioat);
294	callout_drain(&ioat->timer);
295
296	pci_disable_busmaster(device);
297
298	if (ioat->pci_resource != NULL)
299		bus_release_resource(device, SYS_RES_MEMORY,
300		    ioat->pci_resource_id, ioat->pci_resource);
301
302	if (ioat->ring != NULL)
303		ioat_free_ring(ioat, 1 << ioat->ring_size_order, ioat->ring);
304
305	if (ioat->comp_update != NULL) {
306		bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map);
307		bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update,
308		    ioat->comp_update_map);
309		bus_dma_tag_destroy(ioat->comp_update_tag);
310	}
311
312	bus_dma_tag_destroy(ioat->hw_desc_tag);
313
314	return (0);
315}
316
317static int
318ioat_teardown_intr(struct ioat_softc *ioat)
319{
320
321	if (ioat->tag != NULL)
322		bus_teardown_intr(ioat->device, ioat->res, ioat->tag);
323
324	if (ioat->res != NULL)
325		bus_release_resource(ioat->device, SYS_RES_IRQ,
326		    rman_get_rid(ioat->res), ioat->res);
327
328	pci_release_msi(ioat->device);
329	return (0);
330}
331
332static int
333ioat_start_channel(struct ioat_softc *ioat)
334{
335	uint64_t status;
336	uint32_t chanerr;
337	int i;
338
339	ioat_acquire(&ioat->dmaengine);
340	ioat_null(&ioat->dmaengine, NULL, NULL, 0);
341	ioat_release(&ioat->dmaengine);
342
343	for (i = 0; i < 100; i++) {
344		DELAY(1);
345		status = ioat_get_chansts(ioat);
346		if (is_ioat_idle(status))
347			return (0);
348	}
349
350	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
351	ioat_log_message(0, "could not start channel: "
352	    "status = %#jx error = %x\n", (uintmax_t)status, chanerr);
353	return (ENXIO);
354}
355
356/*
357 * Initialize Hardware
358 */
359static int
360ioat3_attach(device_t device)
361{
362	struct ioat_softc *ioat;
363	struct ioat_descriptor **ring;
364	struct ioat_descriptor *next;
365	struct ioat_dma_hw_descriptor *dma_hw_desc;
366	uint32_t capabilities;
367	int i, num_descriptors;
368	int error;
369	uint8_t xfercap;
370
371	error = 0;
372	ioat = DEVICE2SOFTC(device);
373	capabilities = ioat_read_dmacapability(ioat);
374
375	xfercap = ioat_read_xfercap(ioat);
376	ioat->max_xfer_size = 1 << xfercap;
377
378	/* TODO: need to check DCA here if we ever do XOR/PQ */
379
380	mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF);
381	mtx_init(&ioat->cleanup_lock, "ioat_process_events", NULL, MTX_DEF);
382	callout_init(&ioat->timer, 1);
383
384	ioat->is_resize_pending = FALSE;
385	ioat->is_completion_pending = FALSE;
386	ioat->is_reset_pending = FALSE;
387	ioat->is_channel_running = FALSE;
388
389	bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0,
390	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
391	    sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL,
392	    &ioat->comp_update_tag);
393
394	error = bus_dmamem_alloc(ioat->comp_update_tag,
395	    (void **)&ioat->comp_update, BUS_DMA_ZERO, &ioat->comp_update_map);
396	if (ioat->comp_update == NULL)
397		return (ENOMEM);
398
399	error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map,
400	    ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat,
401	    0);
402	if (error != 0)
403		return (error);
404
405	ioat->ring_size_order = IOAT_MIN_ORDER;
406
407	num_descriptors = 1 << ioat->ring_size_order;
408
409	bus_dma_tag_create(bus_get_dma_tag(ioat->device), 0x40, 0x0,
410	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
411	    sizeof(struct ioat_dma_hw_descriptor), 1,
412	    sizeof(struct ioat_dma_hw_descriptor), 0, NULL, NULL,
413	    &ioat->hw_desc_tag);
414
415	ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT,
416	    M_ZERO | M_WAITOK);
417	if (ioat->ring == NULL)
418		return (ENOMEM);
419
420	ring = ioat->ring;
421	for (i = 0; i < num_descriptors; i++) {
422		ring[i] = ioat_alloc_ring_entry(ioat, M_WAITOK);
423		if (ring[i] == NULL)
424			return (ENOMEM);
425
426		ring[i]->id = i;
427	}
428
429	for (i = 0; i < num_descriptors - 1; i++) {
430		next = ring[i + 1];
431		dma_hw_desc = ring[i]->u.dma;
432
433		dma_hw_desc->next = next->hw_desc_bus_addr;
434	}
435
436	ring[i]->u.dma->next = ring[0]->hw_desc_bus_addr;
437
438	ioat->head = ioat->hw_head = 0;
439	ioat->tail = 0;
440	ioat->last_seen = 0;
441	return (0);
442}
443
444static int
445ioat_map_pci_bar(struct ioat_softc *ioat)
446{
447
448	ioat->pci_resource_id = PCIR_BAR(0);
449	ioat->pci_resource = bus_alloc_resource_any(ioat->device,
450	    SYS_RES_MEMORY, &ioat->pci_resource_id, RF_ACTIVE);
451
452	if (ioat->pci_resource == NULL) {
453		ioat_log_message(0, "unable to allocate pci resource\n");
454		return (ENODEV);
455	}
456
457	ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource);
458	ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource);
459	return (0);
460}
461
462static void
463ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
464{
465	struct ioat_softc *ioat = arg;
466
467	KASSERT(error == 0, ("%s: error:%d", __func__, error));
468	ioat->comp_update_bus_addr = seg[0].ds_addr;
469}
470
471static void
472ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
473{
474	bus_addr_t *baddr;
475
476	KASSERT(error == 0, ("%s: error:%d", __func__, error));
477	baddr = arg;
478	*baddr = segs->ds_addr;
479}
480
481/*
482 * Interrupt setup and handlers
483 */
484static int
485ioat_setup_intr(struct ioat_softc *ioat)
486{
487	uint32_t num_vectors;
488	int error;
489	boolean_t use_msix;
490	boolean_t force_legacy_interrupts;
491
492	use_msix = FALSE;
493	force_legacy_interrupts = FALSE;
494
495	if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) {
496		num_vectors = 1;
497		pci_alloc_msix(ioat->device, &num_vectors);
498		if (num_vectors == 1)
499			use_msix = TRUE;
500	}
501
502	if (use_msix) {
503		ioat->rid = 1;
504		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
505		    &ioat->rid, RF_ACTIVE);
506	} else {
507		ioat->rid = 0;
508		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
509		    &ioat->rid, RF_SHAREABLE | RF_ACTIVE);
510	}
511	if (ioat->res == NULL) {
512		ioat_log_message(0, "bus_alloc_resource failed\n");
513		return (ENOMEM);
514	}
515
516	ioat->tag = NULL;
517	error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE |
518	    INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag);
519	if (error != 0) {
520		ioat_log_message(0, "bus_setup_intr failed\n");
521		return (error);
522	}
523
524	ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN);
525	return (0);
526}
527
528static boolean_t
529ioat_model_resets_msix(struct ioat_softc *ioat)
530{
531	u_int32_t pciid;
532
533	pciid = pci_get_devid(ioat->device);
534	switch (pciid) {
535		/* BWD: */
536	case 0x0c508086:
537	case 0x0c518086:
538	case 0x0c528086:
539	case 0x0c538086:
540		/* BDXDE: */
541	case 0x6f508086:
542	case 0x6f518086:
543	case 0x6f528086:
544	case 0x6f538086:
545		return (TRUE);
546	}
547
548	return (FALSE);
549}
550
551static void
552ioat_interrupt_handler(void *arg)
553{
554	struct ioat_softc *ioat = arg;
555
556	ioat_process_events(ioat);
557}
558
559static void
560ioat_process_events(struct ioat_softc *ioat)
561{
562	struct ioat_descriptor *desc;
563	struct bus_dmadesc *dmadesc;
564	uint64_t comp_update, status;
565	uint32_t completed;
566
567	mtx_lock(&ioat->cleanup_lock);
568
569	completed = 0;
570	comp_update = *ioat->comp_update;
571	status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
572
573	CTR0(KTR_IOAT, __func__);
574
575	if (status == ioat->last_seen)
576		goto out;
577
578	while (1) {
579		desc = ioat_get_ring_entry(ioat, ioat->tail);
580		dmadesc = &desc->bus_dmadesc;
581		CTR1(KTR_IOAT, "completing desc %d", ioat->tail);
582
583		if (dmadesc->callback_fn)
584			(*dmadesc->callback_fn)(dmadesc->callback_arg);
585
586		completed++;
587		ioat->tail++;
588		if (desc->hw_desc_bus_addr == status)
589			break;
590	}
591
592	ioat->last_seen = desc->hw_desc_bus_addr;
593
594	if (ioat->head == ioat->tail) {
595		ioat->is_completion_pending = FALSE;
596		callout_reset(&ioat->timer, IOAT_INTR_TIMO,
597		    ioat_timer_callback, ioat);
598	}
599
600out:
601	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
602	mtx_unlock(&ioat->cleanup_lock);
603
604	ioat_putn(ioat, completed, IOAT_ACTIVE_DESCR_REF);
605	wakeup(&ioat->tail);
606}
607
608/*
609 * User API functions
610 */
611bus_dmaengine_t
612ioat_get_dmaengine(uint32_t index)
613{
614
615	if (index >= ioat_channel_index)
616		return (NULL);
617	return (&ioat_get(ioat_channel[index], IOAT_DMAENGINE_REF)->dmaengine);
618}
619
620void
621ioat_put_dmaengine(bus_dmaengine_t dmaengine)
622{
623	struct ioat_softc *ioat;
624
625	ioat = to_ioat_softc(dmaengine);
626	ioat_put(ioat, IOAT_DMAENGINE_REF);
627}
628
629void
630ioat_acquire(bus_dmaengine_t dmaengine)
631{
632	struct ioat_softc *ioat;
633
634	ioat = to_ioat_softc(dmaengine);
635	mtx_lock(&ioat->submit_lock);
636	CTR0(KTR_IOAT, __func__);
637}
638
639void
640ioat_release(bus_dmaengine_t dmaengine)
641{
642	struct ioat_softc *ioat;
643
644	ioat = to_ioat_softc(dmaengine);
645	CTR0(KTR_IOAT, __func__);
646	ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET, (uint16_t)ioat->hw_head);
647	mtx_unlock(&ioat->submit_lock);
648}
649
650struct bus_dmadesc *
651ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn,
652    void *callback_arg, uint32_t flags)
653{
654	struct ioat_softc *ioat;
655	struct ioat_descriptor *desc;
656	struct ioat_dma_hw_descriptor *hw_desc;
657	int mflags;
658
659	KASSERT((flags & ~DMA_ALL_FLAGS) == 0, ("Unrecognized flag(s): %#x",
660		flags & ~DMA_ALL_FLAGS));
661	if ((flags & DMA_NO_WAIT) != 0)
662		mflags = M_NOWAIT;
663	else
664		mflags = M_WAITOK;
665
666	ioat = to_ioat_softc(dmaengine);
667	mtx_assert(&ioat->submit_lock, MA_OWNED);
668
669	if (ioat_reserve_space(ioat, 1, mflags) != 0)
670		return (NULL);
671
672	CTR0(KTR_IOAT, __func__);
673
674	desc = ioat_get_ring_entry(ioat, ioat->head);
675	hw_desc = desc->u.dma;
676
677	hw_desc->u.control_raw = 0;
678	hw_desc->u.control.null = 1;
679	hw_desc->u.control.completion_update = 1;
680
681	if ((flags & DMA_INT_EN) != 0)
682		hw_desc->u.control.int_enable = 1;
683
684	hw_desc->size = 8;
685	hw_desc->src_addr = 0;
686	hw_desc->dest_addr = 0;
687
688	desc->bus_dmadesc.callback_fn = callback_fn;
689	desc->bus_dmadesc.callback_arg = callback_arg;
690
691	ioat_submit_single(ioat);
692	return (&desc->bus_dmadesc);
693}
694
695struct bus_dmadesc *
696ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
697    bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn,
698    void *callback_arg, uint32_t flags)
699{
700	struct ioat_descriptor *desc;
701	struct ioat_dma_hw_descriptor *hw_desc;
702	struct ioat_softc *ioat;
703	int mflags;
704
705	KASSERT((flags & ~DMA_ALL_FLAGS) == 0, ("Unrecognized flag(s): %#x",
706		flags & ~DMA_ALL_FLAGS));
707	if ((flags & DMA_NO_WAIT) != 0)
708		mflags = M_NOWAIT;
709	else
710		mflags = M_WAITOK;
711
712	ioat = to_ioat_softc(dmaengine);
713	mtx_assert(&ioat->submit_lock, MA_OWNED);
714
715	if (len > ioat->max_xfer_size) {
716		ioat_log_message(0, "%s: max_xfer_size = %d, requested = %d\n",
717		    __func__, ioat->max_xfer_size, (int)len);
718		return (NULL);
719	}
720
721	if (ioat_reserve_space(ioat, 1, mflags) != 0)
722		return (NULL);
723
724	CTR0(KTR_IOAT, __func__);
725
726	desc = ioat_get_ring_entry(ioat, ioat->head);
727	hw_desc = desc->u.dma;
728
729	hw_desc->u.control_raw = 0;
730	hw_desc->u.control.completion_update = 1;
731
732	if ((flags & DMA_INT_EN) != 0)
733		hw_desc->u.control.int_enable = 1;
734
735	hw_desc->size = len;
736	hw_desc->src_addr = src;
737	hw_desc->dest_addr = dst;
738
739	if (g_ioat_debug_level >= 3)
740		dump_descriptor(hw_desc);
741
742	desc->bus_dmadesc.callback_fn = callback_fn;
743	desc->bus_dmadesc.callback_arg = callback_arg;
744
745	ioat_submit_single(ioat);
746	return (&desc->bus_dmadesc);
747}
748
749/*
750 * Ring Management
751 */
752static inline uint32_t
753ioat_get_active(struct ioat_softc *ioat)
754{
755
756	return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1));
757}
758
759static inline uint32_t
760ioat_get_ring_space(struct ioat_softc *ioat)
761{
762
763	return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1);
764}
765
766static struct ioat_descriptor *
767ioat_alloc_ring_entry(struct ioat_softc *ioat, int mflags)
768{
769	struct ioat_dma_hw_descriptor *hw_desc;
770	struct ioat_descriptor *desc;
771	int error, busdmaflag;
772
773	error = ENOMEM;
774	hw_desc = NULL;
775
776	if ((mflags & M_WAITOK) != 0)
777		busdmaflag = BUS_DMA_WAITOK;
778	else
779		busdmaflag = BUS_DMA_NOWAIT;
780
781	desc = malloc(sizeof(*desc), M_IOAT, mflags);
782	if (desc == NULL)
783		goto out;
784
785	bus_dmamem_alloc(ioat->hw_desc_tag, (void **)&hw_desc,
786	    BUS_DMA_ZERO | busdmaflag, &ioat->hw_desc_map);
787	if (hw_desc == NULL)
788		goto out;
789
790	desc->u.dma = hw_desc;
791
792	error = bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc,
793	    sizeof(*hw_desc), ioat_dmamap_cb, &desc->hw_desc_bus_addr,
794	    busdmaflag);
795	if (error)
796		goto out;
797
798out:
799	if (error) {
800		ioat_free_ring_entry(ioat, desc);
801		return (NULL);
802	}
803	return (desc);
804}
805
806static void
807ioat_free_ring_entry(struct ioat_softc *ioat, struct ioat_descriptor *desc)
808{
809
810	if (desc == NULL)
811		return;
812
813	if (desc->u.dma)
814		bus_dmamem_free(ioat->hw_desc_tag, desc->u.dma,
815		    ioat->hw_desc_map);
816	free(desc, M_IOAT);
817}
818
819/*
820 * Reserves space in this IOAT descriptor ring by ensuring enough slots remain
821 * for 'num_descs'.
822 *
823 * If mflags contains M_WAITOK, blocks until enough space is available.
824 *
825 * Returns zero on success, or an errno on error.  If num_descs is beyond the
826 * maximum ring size, returns EINVAl; if allocation would block and mflags
827 * contains M_NOWAIT, returns EAGAIN.
828 *
829 * Must be called with the submit_lock held; returns with the lock held.  The
830 * lock may be dropped to allocate the ring.
831 *
832 * (The submit_lock is needed to add any entries to the ring, so callers are
833 * assured enough room is available.)
834 */
835static int
836ioat_reserve_space(struct ioat_softc *ioat, uint32_t num_descs, int mflags)
837{
838	struct ioat_descriptor **new_ring;
839	uint32_t order;
840	int error;
841
842	mtx_assert(&ioat->submit_lock, MA_OWNED);
843	error = 0;
844
845	if (num_descs < 1 || num_descs > (1 << IOAT_MAX_ORDER)) {
846		error = EINVAL;
847		goto out;
848	}
849
850	for (;;) {
851		if (ioat_get_ring_space(ioat) >= num_descs)
852			goto out;
853
854		order = ioat->ring_size_order;
855		if (ioat->is_resize_pending || order == IOAT_MAX_ORDER) {
856			if ((mflags & M_WAITOK) != 0) {
857				msleep(&ioat->tail, &ioat->submit_lock, 0,
858				    "ioat_rsz", 0);
859				continue;
860			}
861
862			error = EAGAIN;
863			break;
864		}
865
866		ioat->is_resize_pending = TRUE;
867		for (;;) {
868			mtx_unlock(&ioat->submit_lock);
869
870			new_ring = ioat_prealloc_ring(ioat, 1 << (order + 1),
871			    TRUE, mflags);
872
873			mtx_lock(&ioat->submit_lock);
874			KASSERT(ioat->ring_size_order == order,
875			    ("is_resize_pending should protect order"));
876
877			if (new_ring == NULL) {
878				KASSERT((mflags & M_WAITOK) == 0,
879				    ("allocation failed"));
880				error = EAGAIN;
881				break;
882			}
883
884			error = ring_grow(ioat, order, new_ring);
885			if (error == 0)
886				break;
887		}
888		ioat->is_resize_pending = FALSE;
889		wakeup(&ioat->tail);
890		if (error)
891			break;
892	}
893
894out:
895	mtx_assert(&ioat->submit_lock, MA_OWNED);
896	return (error);
897}
898
899static struct ioat_descriptor **
900ioat_prealloc_ring(struct ioat_softc *ioat, uint32_t size, boolean_t need_dscr,
901    int mflags)
902{
903	struct ioat_descriptor **ring;
904	uint32_t i;
905	int error;
906
907	KASSERT(size > 0 && powerof2(size), ("bogus size"));
908
909	ring = malloc(size * sizeof(*ring), M_IOAT, M_ZERO | mflags);
910	if (ring == NULL)
911		return (NULL);
912
913	if (need_dscr) {
914		error = ENOMEM;
915		for (i = size / 2; i < size; i++) {
916			ring[i] = ioat_alloc_ring_entry(ioat, mflags);
917			if (ring[i] == NULL)
918				goto out;
919			ring[i]->id = i;
920		}
921	}
922	error = 0;
923
924out:
925	if (error != 0 && ring != NULL) {
926		ioat_free_ring(ioat, size, ring);
927		ring = NULL;
928	}
929	return (ring);
930}
931
932static void
933ioat_free_ring(struct ioat_softc *ioat, uint32_t size,
934    struct ioat_descriptor **ring)
935{
936	uint32_t i;
937
938	for (i = 0; i < size; i++) {
939		if (ring[i] != NULL)
940			ioat_free_ring_entry(ioat, ring[i]);
941	}
942	free(ring, M_IOAT);
943}
944
945static struct ioat_descriptor *
946ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index)
947{
948
949	return (ioat->ring[index % (1 << ioat->ring_size_order)]);
950}
951
952static int
953ring_grow(struct ioat_softc *ioat, uint32_t oldorder,
954    struct ioat_descriptor **newring)
955{
956	struct ioat_descriptor *tmp, *next;
957	struct ioat_dma_hw_descriptor *hw;
958	uint32_t oldsize, newsize, head, tail, i, end;
959	int error;
960
961	CTR0(KTR_IOAT, __func__);
962
963	mtx_assert(&ioat->submit_lock, MA_OWNED);
964
965	if (oldorder != ioat->ring_size_order || oldorder >= IOAT_MAX_ORDER) {
966		error = EINVAL;
967		goto out;
968	}
969
970	oldsize = (1 << oldorder);
971	newsize = (1 << (oldorder + 1));
972
973	mtx_lock(&ioat->cleanup_lock);
974
975	head = ioat->head & (oldsize - 1);
976	tail = ioat->tail & (oldsize - 1);
977
978	/* Copy old descriptors to new ring */
979	for (i = 0; i < oldsize; i++)
980		newring[i] = ioat->ring[i];
981
982	/*
983	 * If head has wrapped but tail hasn't, we must swap some descriptors
984	 * around so that tail can increment directly to head.
985	 */
986	if (head < tail) {
987		for (i = 0; i <= head; i++) {
988			tmp = newring[oldsize + i];
989
990			newring[oldsize + i] = newring[i];
991			newring[oldsize + i]->id = oldsize + i;
992
993			newring[i] = tmp;
994			newring[i]->id = i;
995		}
996		head += oldsize;
997	}
998
999	KASSERT(head >= tail, ("invariants"));
1000
1001	/* Head didn't wrap; we only need to link in oldsize..newsize */
1002	if (head < oldsize) {
1003		i = oldsize - 1;
1004		end = newsize;
1005	} else {
1006		/* Head did wrap; link newhead..newsize and 0..oldhead */
1007		i = head;
1008		end = newsize + (head - oldsize) + 1;
1009	}
1010
1011	/*
1012	 * Fix up hardware ring, being careful not to trample the active
1013	 * section (tail -> head).
1014	 */
1015	for (; i < end; i++) {
1016		KASSERT((i & (newsize - 1)) < tail ||
1017		    (i & (newsize - 1)) >= head, ("trampling snake"));
1018
1019		next = newring[(i + 1) & (newsize - 1)];
1020		hw = newring[i & (newsize - 1)]->u.dma;
1021		hw->next = next->hw_desc_bus_addr;
1022	}
1023
1024	free(ioat->ring, M_IOAT);
1025	ioat->ring = newring;
1026	ioat->ring_size_order = oldorder + 1;
1027	ioat->tail = tail;
1028	ioat->head = head;
1029	error = 0;
1030
1031	mtx_unlock(&ioat->cleanup_lock);
1032out:
1033	if (error)
1034		ioat_free_ring(ioat, (1 << (oldorder + 1)), newring);
1035	return (error);
1036}
1037
1038static int
1039ring_shrink(struct ioat_softc *ioat, uint32_t oldorder,
1040    struct ioat_descriptor **newring)
1041{
1042	struct ioat_dma_hw_descriptor *hw;
1043	struct ioat_descriptor *ent, *next;
1044	uint32_t oldsize, newsize, current_idx, new_idx, i;
1045	int error;
1046
1047	CTR0(KTR_IOAT, __func__);
1048
1049	mtx_assert(&ioat->submit_lock, MA_OWNED);
1050
1051	if (oldorder != ioat->ring_size_order || oldorder <= IOAT_MIN_ORDER) {
1052		error = EINVAL;
1053		goto out_unlocked;
1054	}
1055
1056	oldsize = (1 << oldorder);
1057	newsize = (1 << (oldorder - 1));
1058
1059	mtx_lock(&ioat->cleanup_lock);
1060
1061	/* Can't shrink below current active set! */
1062	if (ioat_get_active(ioat) >= newsize) {
1063		error = ENOMEM;
1064		goto out;
1065	}
1066
1067	/*
1068	 * Copy current descriptors to the new ring, dropping the removed
1069	 * descriptors.
1070	 */
1071	for (i = 0; i < newsize; i++) {
1072		current_idx = (ioat->tail + i) & (oldsize - 1);
1073		new_idx = (ioat->tail + i) & (newsize - 1);
1074
1075		newring[new_idx] = ioat->ring[current_idx];
1076		newring[new_idx]->id = new_idx;
1077	}
1078
1079	/* Free deleted descriptors */
1080	for (i = newsize; i < oldsize; i++) {
1081		ent = ioat_get_ring_entry(ioat, ioat->tail + i);
1082		ioat_free_ring_entry(ioat, ent);
1083	}
1084
1085	/* Fix up hardware ring. */
1086	hw = newring[(ioat->tail + newsize - 1) & (newsize - 1)]->u.dma;
1087	next = newring[(ioat->tail + newsize) & (newsize - 1)];
1088	hw->next = next->hw_desc_bus_addr;
1089
1090	free(ioat->ring, M_IOAT);
1091	ioat->ring = newring;
1092	ioat->ring_size_order = oldorder - 1;
1093	error = 0;
1094
1095out:
1096	mtx_unlock(&ioat->cleanup_lock);
1097out_unlocked:
1098	if (error)
1099		ioat_free_ring(ioat, (1 << (oldorder - 1)), newring);
1100	return (error);
1101}
1102
1103static void
1104ioat_halted_debug(struct ioat_softc *ioat, uint32_t chanerr)
1105{
1106	struct ioat_descriptor *desc;
1107
1108	ioat_log_message(0, "Channel halted (%x)\n", chanerr);
1109	if (chanerr == 0)
1110		return;
1111
1112	mtx_lock(&ioat->submit_lock);
1113	desc = ioat_get_ring_entry(ioat, ioat->tail + 0);
1114	dump_descriptor(desc->u.raw);
1115
1116	desc = ioat_get_ring_entry(ioat, ioat->tail + 1);
1117	dump_descriptor(desc->u.raw);
1118	mtx_unlock(&ioat->submit_lock);
1119}
1120
1121static void
1122ioat_timer_callback(void *arg)
1123{
1124	struct ioat_descriptor **newring;
1125	struct ioat_softc *ioat;
1126	uint64_t status;
1127	uint32_t chanerr, order;
1128
1129	ioat = arg;
1130	ioat_log_message(1, "%s\n", __func__);
1131
1132	if (ioat->is_completion_pending) {
1133		status = ioat_get_chansts(ioat);
1134
1135		/*
1136		 * When halted due to errors, check for channel programming
1137		 * errors before advancing the completion state.
1138		 */
1139		if (is_ioat_halted(status)) {
1140			chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
1141			ioat_halted_debug(ioat, chanerr);
1142		}
1143		ioat_process_events(ioat);
1144	} else {
1145		mtx_lock(&ioat->submit_lock);
1146		order = ioat->ring_size_order;
1147		if (ioat->is_resize_pending || order == IOAT_MIN_ORDER) {
1148			mtx_unlock(&ioat->submit_lock);
1149			goto out;
1150		}
1151		ioat->is_resize_pending = TRUE;
1152		mtx_unlock(&ioat->submit_lock);
1153
1154		newring = ioat_prealloc_ring(ioat, 1 << (order - 1), FALSE,
1155		    M_NOWAIT);
1156
1157		mtx_lock(&ioat->submit_lock);
1158		KASSERT(ioat->ring_size_order == order,
1159		    ("resize_pending protects order"));
1160
1161		if (newring != NULL)
1162			ring_shrink(ioat, order, newring);
1163
1164		ioat->is_resize_pending = FALSE;
1165		mtx_unlock(&ioat->submit_lock);
1166
1167out:
1168		/* Slowly scale the ring down if idle. */
1169		if (ioat->ring_size_order > IOAT_MIN_ORDER)
1170			callout_reset(&ioat->timer, 10 * hz,
1171			    ioat_timer_callback, ioat);
1172	}
1173}
1174
1175/*
1176 * Support Functions
1177 */
1178static void
1179ioat_submit_single(struct ioat_softc *ioat)
1180{
1181
1182	ioat_get(ioat, IOAT_ACTIVE_DESCR_REF);
1183	atomic_add_rel_int(&ioat->head, 1);
1184	atomic_add_rel_int(&ioat->hw_head, 1);
1185
1186	if (!ioat->is_completion_pending) {
1187		ioat->is_completion_pending = TRUE;
1188		callout_reset(&ioat->timer, IOAT_INTR_TIMO,
1189		    ioat_timer_callback, ioat);
1190	}
1191}
1192
1193static int
1194ioat_reset_hw(struct ioat_softc *ioat)
1195{
1196	uint64_t status;
1197	uint32_t chanerr;
1198	unsigned timeout;
1199
1200	status = ioat_get_chansts(ioat);
1201	if (is_ioat_active(status) || is_ioat_idle(status))
1202		ioat_suspend(ioat);
1203
1204	/* Wait at most 20 ms */
1205	for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) &&
1206	    timeout < 20; timeout++) {
1207		DELAY(1000);
1208		status = ioat_get_chansts(ioat);
1209	}
1210	if (timeout == 20)
1211		return (ETIMEDOUT);
1212
1213	KASSERT(ioat_get_active(ioat) == 0, ("active after quiesce"));
1214
1215	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
1216	ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
1217
1218	/*
1219	 * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors
1220	 *  that can cause stability issues for IOAT v3.
1221	 */
1222	pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07,
1223	    4);
1224	chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4);
1225	pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4);
1226
1227	/*
1228	 * BDXDE and BWD models reset MSI-X registers on device reset.
1229	 * Save/restore their contents manually.
1230	 */
1231	if (ioat_model_resets_msix(ioat)) {
1232		ioat_log_message(1, "device resets MSI-X registers; saving\n");
1233		pci_save_state(ioat->device);
1234	}
1235
1236	ioat_reset(ioat);
1237
1238	/* Wait at most 20 ms */
1239	for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++)
1240		DELAY(1000);
1241	if (timeout == 20)
1242		return (ETIMEDOUT);
1243
1244	if (ioat_model_resets_msix(ioat)) {
1245		ioat_log_message(1, "device resets registers; restored\n");
1246		pci_restore_state(ioat->device);
1247	}
1248
1249	/* Reset attempts to return the hardware to "halted." */
1250	status = ioat_get_chansts(ioat);
1251	if (is_ioat_active(status) || is_ioat_idle(status)) {
1252		/* So this really shouldn't happen... */
1253		ioat_log_message(0, "Device is active after a reset?\n");
1254		ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
1255		return (0);
1256	}
1257
1258	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
1259	ioat_halted_debug(ioat, chanerr);
1260	if (chanerr != 0)
1261		return (EIO);
1262
1263	/*
1264	 * Bring device back online after reset.  Writing CHAINADDR brings the
1265	 * device back to active.
1266	 *
1267	 * The internal ring counter resets to zero, so we have to start over
1268	 * at zero as well.
1269	 */
1270	ioat->tail = ioat->head = ioat->hw_head = 0;
1271	ioat->last_seen = 0;
1272
1273	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
1274	ioat_write_chancmp(ioat, ioat->comp_update_bus_addr);
1275	ioat_write_chainaddr(ioat, ioat->ring[0]->hw_desc_bus_addr);
1276	return (ioat_start_channel(ioat));
1277}
1278
1279static int
1280sysctl_handle_reset(SYSCTL_HANDLER_ARGS)
1281{
1282	struct ioat_softc *ioat;
1283	int error, arg;
1284
1285	ioat = arg1;
1286
1287	arg = 0;
1288	error = SYSCTL_OUT(req, &arg, sizeof(arg));
1289	if (error != 0 || req->newptr == NULL)
1290		return (error);
1291
1292	error = SYSCTL_IN(req, &arg, sizeof(arg));
1293	if (error != 0)
1294		return (error);
1295
1296	if (arg != 0)
1297		error = ioat_reset_hw(ioat);
1298
1299	return (error);
1300}
1301
1302static void
1303dump_descriptor(void *hw_desc)
1304{
1305	int i, j;
1306
1307	for (i = 0; i < 2; i++) {
1308		for (j = 0; j < 8; j++)
1309			printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]);
1310		printf("\n");
1311	}
1312}
1313
1314static void
1315ioat_setup_sysctl(device_t device)
1316{
1317	struct sysctl_oid_list *par;
1318	struct sysctl_ctx_list *ctx;
1319	struct sysctl_oid *tree;
1320	struct ioat_softc *ioat;
1321
1322	ioat = DEVICE2SOFTC(device);
1323	ctx = device_get_sysctl_ctx(device);
1324	tree = device_get_sysctl_tree(device);
1325	par = SYSCTL_CHILDREN(tree);
1326
1327	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "version", CTLFLAG_RD,
1328	    &ioat->version, 0, "HW version (0xMM form)");
1329	SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD,
1330	    &ioat->max_xfer_size, 0, "HW maximum transfer size");
1331
1332	SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "ring_size_order", CTLFLAG_RD,
1333	    &ioat->ring_size_order, 0, "SW descriptor ring size order");
1334	SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "head", CTLFLAG_RD, &ioat->head, 0,
1335	    "SW descriptor head pointer index");
1336	SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail, 0,
1337	    "SW descriptor tail pointer index");
1338	SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "hw_head", CTLFLAG_RD,
1339	    &ioat->hw_head, 0, "HW DMACOUNT");
1340
1341	SYSCTL_ADD_UQUAD(ctx, par, OID_AUTO, "last_completion", CTLFLAG_RD,
1342	    ioat->comp_update, "HW addr of last completion");
1343
1344	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "is_resize_pending", CTLFLAG_RD,
1345	    &ioat->is_resize_pending, 0, "resize pending");
1346	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "is_completion_pending", CTLFLAG_RD,
1347	    &ioat->is_completion_pending, 0, "completion pending");
1348	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "is_reset_pending", CTLFLAG_RD,
1349	    &ioat->is_reset_pending, 0, "reset pending");
1350	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "is_channel_running", CTLFLAG_RD,
1351	    &ioat->is_channel_running, 0, "channel running");
1352
1353	SYSCTL_ADD_PROC(ctx, par, OID_AUTO, "force_hw_reset",
1354	    CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_reset, "I",
1355	    "Set to non-zero to reset the hardware");
1356}
1357
1358static inline struct ioat_softc *
1359ioat_get(struct ioat_softc *ioat, enum ioat_ref_kind kind)
1360{
1361	uint32_t old;
1362
1363	KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus"));
1364
1365	old = atomic_fetchadd_32(&ioat->refcnt, 1);
1366	KASSERT(old < UINT32_MAX, ("refcnt overflow"));
1367
1368#ifdef INVARIANTS
1369	old = atomic_fetchadd_32(&ioat->refkinds[kind], 1);
1370	KASSERT(old < UINT32_MAX, ("refcnt kind overflow"));
1371#endif
1372
1373	return (ioat);
1374}
1375
1376static inline void
1377ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind)
1378{
1379	uint32_t old;
1380
1381	KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus"));
1382
1383	if (n == 0)
1384		return;
1385
1386#ifdef INVARIANTS
1387	old = atomic_fetchadd_32(&ioat->refkinds[kind], -n);
1388	KASSERT(old >= n, ("refcnt kind underflow"));
1389#endif
1390
1391	/* Skip acquiring the lock if resulting refcnt > 0. */
1392	for (;;) {
1393		old = ioat->refcnt;
1394		if (old <= n)
1395			break;
1396		if (atomic_cmpset_32(&ioat->refcnt, old, old - n))
1397			return;
1398	}
1399
1400	mtx_lock(IOAT_REFLK);
1401	old = atomic_fetchadd_32(&ioat->refcnt, -n);
1402	KASSERT(old >= n, ("refcnt error"));
1403
1404	if (old == n)
1405		wakeup(IOAT_REFLK);
1406	mtx_unlock(IOAT_REFLK);
1407}
1408
1409static inline void
1410ioat_put(struct ioat_softc *ioat, enum ioat_ref_kind kind)
1411{
1412
1413	ioat_putn(ioat, 1, kind);
1414}
1415
1416static void
1417ioat_drain(struct ioat_softc *ioat)
1418{
1419
1420	mtx_lock(IOAT_REFLK);
1421	while (ioat->refcnt > 0)
1422		msleep(IOAT_REFLK, IOAT_REFLK, 0, "ioat_drain", 0);
1423	mtx_unlock(IOAT_REFLK);
1424}
1425