1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012 Adrian Chadd <adrian@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer,
12 *    without modification.
13 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
14 *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
15 *    redistribution must be conditioned upon including a substantially
16 *    similar Disclaimer requirement for further binary redistribution.
17 *
18 * NO WARRANTY
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
24 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
27 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
29 * THE POSSIBILITY OF SUCH DAMAGES.
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: releng/12.0/sys/dev/ath/if_ath_tx_edma.c 326255 2017-11-27 14:52:40Z pfg $");
34
35/*
36 * Driver for the Atheros Wireless LAN controller.
37 *
38 * This software is derived from work of Atsushi Onoe; his contribution
39 * is greatly appreciated.
40 */
41
42#include "opt_inet.h"
43#include "opt_ath.h"
44/*
45 * This is needed for register operations which are performed
46 * by the driver - eg, calls to ath_hal_gettsf32().
47 *
48 * It's also required for any AH_DEBUG checks in here, eg the
49 * module dependencies.
50 */
51#include "opt_ah.h"
52#include "opt_wlan.h"
53
54#include <sys/param.h>
55#include <sys/systm.h>
56#include <sys/sysctl.h>
57#include <sys/mbuf.h>
58#include <sys/malloc.h>
59#include <sys/lock.h>
60#include <sys/mutex.h>
61#include <sys/kernel.h>
62#include <sys/socket.h>
63#include <sys/sockio.h>
64#include <sys/errno.h>
65#include <sys/callout.h>
66#include <sys/bus.h>
67#include <sys/endian.h>
68#include <sys/kthread.h>
69#include <sys/taskqueue.h>
70#include <sys/priv.h>
71#include <sys/module.h>
72#include <sys/ktr.h>
73#include <sys/smp.h>	/* for mp_ncpus */
74
75#include <machine/bus.h>
76
77#include <net/if.h>
78#include <net/if_var.h>
79#include <net/if_dl.h>
80#include <net/if_media.h>
81#include <net/if_types.h>
82#include <net/if_arp.h>
83#include <net/ethernet.h>
84#include <net/if_llc.h>
85
86#include <net80211/ieee80211_var.h>
87#include <net80211/ieee80211_regdomain.h>
88#ifdef IEEE80211_SUPPORT_SUPERG
89#include <net80211/ieee80211_superg.h>
90#endif
91#ifdef IEEE80211_SUPPORT_TDMA
92#include <net80211/ieee80211_tdma.h>
93#endif
94
95#include <net/bpf.h>
96
97#ifdef INET
98#include <netinet/in.h>
99#include <netinet/if_ether.h>
100#endif
101
102#include <dev/ath/if_athvar.h>
103#include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
104#include <dev/ath/ath_hal/ah_diagcodes.h>
105
106#include <dev/ath/if_ath_debug.h>
107#include <dev/ath/if_ath_misc.h>
108#include <dev/ath/if_ath_tsf.h>
109#include <dev/ath/if_ath_tx.h>
110#include <dev/ath/if_ath_sysctl.h>
111#include <dev/ath/if_ath_led.h>
112#include <dev/ath/if_ath_keycache.h>
113#include <dev/ath/if_ath_rx.h>
114#include <dev/ath/if_ath_beacon.h>
115#include <dev/ath/if_athdfs.h>
116#include <dev/ath/if_ath_descdma.h>
117
118#ifdef ATH_TX99_DIAG
119#include <dev/ath/ath_tx99/ath_tx99.h>
120#endif
121
122#include <dev/ath/if_ath_tx_edma.h>
123
124#ifdef	ATH_DEBUG_ALQ
125#include <dev/ath/if_ath_alq.h>
126#endif
127
128/*
129 * some general macros
130 */
131#define	INCR(_l, _sz)		(_l) ++; (_l) &= ((_sz) - 1)
132#define	DECR(_l, _sz)		(_l) --; (_l) &= ((_sz) - 1)
133
134/*
135 * XXX doesn't belong here, and should be tunable
136 */
137#define	ATH_TXSTATUS_RING_SIZE	512
138
139MALLOC_DECLARE(M_ATHDEV);
140
141static void ath_edma_tx_processq(struct ath_softc *sc, int dosched);
142
143#ifdef	ATH_DEBUG_ALQ
144static void
145ath_tx_alq_edma_push(struct ath_softc *sc, int txq, int nframes,
146    int fifo_depth, int frame_cnt)
147{
148	struct if_ath_alq_tx_fifo_push aq;
149
150	aq.txq = htobe32(txq);
151	aq.nframes = htobe32(nframes);
152	aq.fifo_depth = htobe32(fifo_depth);
153	aq.frame_cnt = htobe32(frame_cnt);
154
155	if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TX_FIFO_PUSH,
156	    sizeof(aq),
157	    (const char *) &aq);
158}
159#endif	/* ATH_DEBUG_ALQ */
160
161/*
162 * XXX TODO: push an aggregate as a single FIFO slot, even though
163 * it may not meet the TXOP for say, DBA-gated traffic in TDMA mode.
164 *
165 * The TX completion code handles a TX FIFO slot having multiple frames,
166 * aggregate or otherwise, but it may just make things easier to deal
167 * with.
168 *
169 * XXX TODO: track the number of aggregate subframes and put that in the
170 * push alq message.
171 */
172static void
173ath_tx_edma_push_staging_list(struct ath_softc *sc, struct ath_txq *txq,
174    int limit)
175{
176	struct ath_buf *bf, *bf_last;
177	struct ath_buf *bfi, *bfp;
178	int i, sqdepth;
179	TAILQ_HEAD(axq_q_f_s, ath_buf)  sq;
180
181	ATH_TXQ_LOCK_ASSERT(txq);
182
183	DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
184	    "%s: called; TXQ=%d, fifo.depth=%d, axq_q empty=%d\n",
185	    __func__,
186	    txq->axq_qnum,
187	    txq->axq_fifo_depth,
188	    !! (TAILQ_EMPTY(&txq->axq_q)));
189
190	/*
191	 * Don't bother doing any work if it's full.
192	 */
193	if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH)
194		return;
195
196	if (TAILQ_EMPTY(&txq->axq_q))
197		return;
198
199	TAILQ_INIT(&sq);
200
201	/*
202	 * First pass - walk sq, queue up to 'limit' entries,
203	 * subtract them from the staging queue.
204	 */
205	sqdepth = 0;
206	for (i = 0; i < limit; i++) {
207		/* Grab the head entry */
208		bf = ATH_TXQ_FIRST(txq);
209		if (bf == NULL)
210			break;
211		ATH_TXQ_REMOVE(txq, bf, bf_list);
212
213		/* Queue it into our staging list */
214		TAILQ_INSERT_TAIL(&sq, bf, bf_list);
215
216		/* Ensure the flags are cleared */
217		bf->bf_flags &= ~(ATH_BUF_FIFOPTR | ATH_BUF_FIFOEND);
218		sqdepth++;
219	}
220
221	/*
222	 * Ok, so now we have a staging list of up to 'limit'
223	 * frames from the txq.  Now let's wrap that up
224	 * into its own list and pass that to the hardware
225	 * as one FIFO entry.
226	 */
227
228	bf = TAILQ_FIRST(&sq);
229	bf_last = TAILQ_LAST(&sq, axq_q_s);
230
231	/*
232	 * Ok, so here's the gymnastics reqiured to make this
233	 * all sensible.
234	 */
235
236	/*
237	 * Tag the first/last buffer appropriately.
238	 */
239	bf->bf_flags |= ATH_BUF_FIFOPTR;
240	bf_last->bf_flags |= ATH_BUF_FIFOEND;
241
242	/*
243	 * Walk the descriptor list and link them appropriately.
244	 */
245	bfp = NULL;
246	TAILQ_FOREACH(bfi, &sq, bf_list) {
247		if (bfp != NULL) {
248			ath_hal_settxdesclink(sc->sc_ah, bfp->bf_lastds,
249			    bfi->bf_daddr);
250		}
251		bfp = bfi;
252	}
253
254	i = 0;
255	TAILQ_FOREACH(bfi, &sq, bf_list) {
256#ifdef	ATH_DEBUG
257		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
258			ath_printtxbuf(sc, bfi, txq->axq_qnum, i, 0);
259#endif/* ATH_DEBUG */
260#ifdef	ATH_DEBUG_ALQ
261		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
262			ath_tx_alq_post(sc, bfi);
263#endif /* ATH_DEBUG_ALQ */
264		i++;
265	}
266
267	/*
268	 * We now need to push this set of frames onto the tail
269	 * of the FIFO queue.  We don't adjust the aggregate
270	 * count, only the queue depth counter(s).
271	 * We also need to blank the link pointer now.
272	 */
273
274	TAILQ_CONCAT(&txq->fifo.axq_q, &sq, bf_list);
275	/* Bump total queue tracking in FIFO queue */
276	txq->fifo.axq_depth += sqdepth;
277
278	/* Bump FIFO queue */
279	txq->axq_fifo_depth++;
280	DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
281	    "%s: queued %d packets; depth=%d, fifo depth=%d\n",
282	    __func__, sqdepth, txq->fifo.axq_depth, txq->axq_fifo_depth);
283
284	/* Push the first entry into the hardware */
285	ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
286
287	/* Push start on the DMA if it's not already started */
288	ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
289
290#ifdef	ATH_DEBUG_ALQ
291	ath_tx_alq_edma_push(sc, txq->axq_qnum, sqdepth,
292	    txq->axq_fifo_depth,
293	    txq->fifo.axq_depth);
294#endif /* ATH_DEBUG_ALQ */
295}
296
297#define	TX_BATCH_SIZE	32
298
299/*
300 * Push some frames into the TX FIFO if we have space.
301 */
302static void
303ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
304{
305
306	ATH_TXQ_LOCK_ASSERT(txq);
307
308	DPRINTF(sc, ATH_DEBUG_TX_PROC,
309	    "%s: Q%d: called; fifo.depth=%d, fifo depth=%d, depth=%d, aggr_depth=%d\n",
310	    __func__,
311	    txq->axq_qnum,
312	    txq->fifo.axq_depth,
313	    txq->axq_fifo_depth,
314	    txq->axq_depth,
315	    txq->axq_aggr_depth);
316
317	/*
318	 * For now, push up to 32 frames per TX FIFO slot.
319	 * If more are in the hardware queue then they'll
320	 * get populated when we try to send another frame
321	 * or complete a frame - so at most there'll be
322	 * 32 non-AMPDU frames per node/TID anyway.
323	 *
324	 * Note that the hardware staging queue will limit
325	 * how many frames in total we will have pushed into
326	 * here.
327	 *
328	 * Later on, we'll want to push less frames into
329	 * the TX FIFO since we don't want to necessarily
330	 * fill tens or hundreds of milliseconds of potential
331	 * frames.
332	 *
333	 * However, we need more frames right now because of
334	 * how the MAC implements the frame scheduling policy.
335	 * It only ungates a single FIFO entry at a time,
336	 * and will run that until CHNTIME expires or the
337	 * end of that FIFO entry descriptor list is reached.
338	 * So for TDMA we suffer a big performance penalty -
339	 * single TX FIFO entries mean the MAC only sends out
340	 * one frame per DBA event, which turned out on average
341	 * 6ms per TX frame.
342	 *
343	 * So, for aggregates it's okay - it'll push two at a
344	 * time and this will just do them more efficiently.
345	 * For non-aggregates it'll do 4 at a time, up to the
346	 * non-aggr limit (non_aggr, which is 32.)  They should
347	 * be time based rather than a hard count, but I also
348	 * do need sleep.
349	 */
350
351	/*
352	 * Do some basic, basic batching to the hardware
353	 * queue.
354	 *
355	 * If we have TX_BATCH_SIZE entries in the staging
356	 * queue, then let's try to send them all in one hit.
357	 *
358	 * Ensure we don't push more than TX_BATCH_SIZE worth
359	 * in, otherwise we end up draining 8 slots worth of
360	 * 32 frames into the hardware queue and then we don't
361	 * attempt to push more frames in until we empty the
362	 * FIFO.
363	 */
364	if (txq->axq_depth >= TX_BATCH_SIZE / 2 &&
365	    txq->fifo.axq_depth <= TX_BATCH_SIZE) {
366		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
367	}
368
369	/*
370	 * Aggregate check: if we have less than two FIFO slots
371	 * busy and we have some aggregate frames, queue it.
372	 *
373	 * Now, ideally we'd just check to see if the scheduler
374	 * has given us aggregate frames and push them into the FIFO
375	 * as individual slots, as honestly we should just be pushing
376	 * a single aggregate in as one FIFO slot.
377	 *
378	 * Let's do that next once I know this works.
379	 */
380	else if (txq->axq_aggr_depth > 0 && txq->axq_fifo_depth < 2)
381		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
382
383	/*
384	 *
385	 * If we have less, and the TXFIFO isn't empty, let's
386	 * wait until we've finished sending the FIFO.
387	 *
388	 * If we have less, and the TXFIFO is empty, then
389	 * send them.
390	 */
391	else if (txq->axq_fifo_depth == 0) {
392		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
393	}
394}
395
396/*
397 * Re-initialise the DMA FIFO with the current contents of
398 * said TXQ.
399 *
400 * This should only be called as part of the chip reset path, as it
401 * assumes the FIFO is currently empty.
402 */
403static void
404ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
405{
406	struct ath_buf *bf;
407	int i = 0;
408	int fifostart = 1;
409	int old_fifo_depth;
410
411	DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: called\n",
412	    __func__,
413	    txq->axq_qnum);
414
415	ATH_TXQ_LOCK_ASSERT(txq);
416
417	/*
418	 * Let's log if the tracked FIFO depth doesn't match
419	 * what we actually push in.
420	 */
421	old_fifo_depth = txq->axq_fifo_depth;
422	txq->axq_fifo_depth = 0;
423
424	/*
425	 * Walk the FIFO staging list, looking for "head" entries.
426	 * Since we may have a partially completed list of frames,
427	 * we push the first frame we see into the FIFO and re-mark
428	 * it as the head entry.  We then skip entries until we see
429	 * FIFO end, at which point we get ready to push another
430	 * entry into the FIFO.
431	 */
432	TAILQ_FOREACH(bf, &txq->fifo.axq_q, bf_list) {
433		/*
434		 * If we're looking for FIFOEND and we haven't found
435		 * it, skip.
436		 *
437		 * If we're looking for FIFOEND and we've found it,
438		 * reset for another descriptor.
439		 */
440#ifdef	ATH_DEBUG
441		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
442			ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0);
443#endif/* ATH_DEBUG */
444#ifdef	ATH_DEBUG_ALQ
445		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
446			ath_tx_alq_post(sc, bf);
447#endif /* ATH_DEBUG_ALQ */
448
449		if (fifostart == 0) {
450			if (bf->bf_flags & ATH_BUF_FIFOEND)
451				fifostart = 1;
452			continue;
453		}
454
455		/* Make sure we're not overflowing the FIFO! */
456		if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
457			device_printf(sc->sc_dev,
458			    "%s: Q%d: more frames in the queue; FIFO depth=%d?!\n",
459			    __func__,
460			    txq->axq_qnum,
461			    txq->axq_fifo_depth);
462		}
463
464#if 0
465		DPRINTF(sc, ATH_DEBUG_RESET,
466		    "%s: Q%d: depth=%d: pushing bf=%p; start=%d, end=%d\n",
467		    __func__,
468		    txq->axq_qnum,
469		    txq->axq_fifo_depth,
470		    bf,
471		    !! (bf->bf_flags & ATH_BUF_FIFOPTR),
472		    !! (bf->bf_flags & ATH_BUF_FIFOEND));
473#endif
474
475		/*
476		 * Set this to be the first buffer in the FIFO
477		 * list - even if it's also the last buffer in
478		 * a FIFO list!
479		 */
480		bf->bf_flags |= ATH_BUF_FIFOPTR;
481
482		/* Push it into the FIFO and bump the FIFO count */
483		ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
484		txq->axq_fifo_depth++;
485
486		/*
487		 * If this isn't the last entry either, let's
488		 * clear fifostart so we continue looking for
489		 * said last entry.
490		 */
491		if (! (bf->bf_flags & ATH_BUF_FIFOEND))
492			fifostart = 0;
493		i++;
494	}
495
496	/* Only bother starting the queue if there's something in it */
497	if (i > 0)
498		ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
499
500	DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: FIFO depth was %d, is %d\n",
501	    __func__,
502	    txq->axq_qnum,
503	    old_fifo_depth,
504	    txq->axq_fifo_depth);
505
506	/* And now, let's check! */
507	if (txq->axq_fifo_depth != old_fifo_depth) {
508		device_printf(sc->sc_dev,
509		    "%s: Q%d: FIFO depth should be %d, is %d\n",
510		    __func__,
511		    txq->axq_qnum,
512		    old_fifo_depth,
513		    txq->axq_fifo_depth);
514	}
515}
516
517/*
518 * Hand off this frame to a hardware queue.
519 *
520 * Things are a bit hairy in the EDMA world.  The TX FIFO is only
521 * 8 entries deep, so we need to keep track of exactly what we've
522 * pushed into the FIFO and what's just sitting in the TX queue,
523 * waiting to go out.
524 *
525 * So this is split into two halves - frames get appended to the
526 * TXQ; then a scheduler is called to push some frames into the
527 * actual TX FIFO.
528 */
529static void
530ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
531    struct ath_buf *bf)
532{
533
534	ATH_TXQ_LOCK(txq);
535
536	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
537	    ("%s: busy status 0x%x", __func__, bf->bf_flags));
538
539	/*
540	 * XXX TODO: write a hard-coded check to ensure that
541	 * the queue id in the TX descriptor matches txq->axq_qnum.
542	 */
543
544	/* Update aggr stats */
545	if (bf->bf_state.bfs_aggr)
546		txq->axq_aggr_depth++;
547
548	/* Push and update frame stats */
549	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
550
551	/*
552	 * Finally, call the FIFO schedule routine to schedule some
553	 * frames to the FIFO.
554	 */
555	ath_edma_tx_fifo_fill(sc, txq);
556	ATH_TXQ_UNLOCK(txq);
557}
558
559/*
560 * Hand off this frame to a multicast software queue.
561 *
562 * The EDMA TX CABQ will get a list of chained frames, chained
563 * together using the next pointer.  The single head of that
564 * particular queue is pushed to the hardware CABQ.
565 */
566static void
567ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
568    struct ath_buf *bf)
569{
570
571	ATH_TX_LOCK_ASSERT(sc);
572	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
573	    ("%s: busy status 0x%x", __func__, bf->bf_flags));
574
575	ATH_TXQ_LOCK(txq);
576	/*
577	 * XXX this is mostly duplicated in ath_tx_handoff_mcast().
578	 */
579	if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) {
580		struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
581		struct ieee80211_frame *wh;
582
583		/* mark previous frame */
584		wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
585		wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
586
587		/* re-sync buffer to memory */
588		bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
589		   BUS_DMASYNC_PREWRITE);
590
591		/* link descriptor */
592		ath_hal_settxdesclink(sc->sc_ah,
593		    bf_last->bf_lastds,
594		    bf->bf_daddr);
595	}
596#ifdef	ATH_DEBUG_ALQ
597	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
598		ath_tx_alq_post(sc, bf);
599#endif	/* ATH_DEBUG_ALQ */
600	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
601	ATH_TXQ_UNLOCK(txq);
602}
603
604/*
605 * Handoff this frame to the hardware.
606 *
607 * For the multicast queue, this will treat it as a software queue
608 * and append it to the list, after updating the MORE_DATA flag
609 * in the previous frame.  The cabq processing code will ensure
610 * that the queue contents gets transferred over.
611 *
612 * For the hardware queues, this will queue a frame to the queue
613 * like before, then populate the FIFO from that.  Since the
614 * EDMA hardware has 8 FIFO slots per TXQ, this ensures that
615 * frames such as management frames don't get prematurely dropped.
616 *
617 * This does imply that a similar flush-hwq-to-fifoq method will
618 * need to be called from the processq function, before the
619 * per-node software scheduler is called.
620 */
621static void
622ath_edma_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
623    struct ath_buf *bf)
624{
625
626	DPRINTF(sc, ATH_DEBUG_XMIT_DESC,
627	    "%s: called; bf=%p, txq=%p, qnum=%d\n",
628	    __func__,
629	    bf,
630	    txq,
631	    txq->axq_qnum);
632
633	if (txq->axq_qnum == ATH_TXQ_SWQ)
634		ath_edma_xmit_handoff_mcast(sc, txq, bf);
635	else
636		ath_edma_xmit_handoff_hw(sc, txq, bf);
637}
638
639static int
640ath_edma_setup_txfifo(struct ath_softc *sc, int qnum)
641{
642	struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
643
644	te->m_fifo = malloc(sizeof(struct ath_buf *) * HAL_TXFIFO_DEPTH,
645	    M_ATHDEV,
646	    M_NOWAIT | M_ZERO);
647	if (te->m_fifo == NULL) {
648		device_printf(sc->sc_dev, "%s: malloc failed\n",
649		    __func__);
650		return (-ENOMEM);
651	}
652
653	/*
654	 * Set initial "empty" state.
655	 */
656	te->m_fifo_head = te->m_fifo_tail = te->m_fifo_depth = 0;
657
658	return (0);
659}
660
661static int
662ath_edma_free_txfifo(struct ath_softc *sc, int qnum)
663{
664	struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
665
666	/* XXX TODO: actually deref the ath_buf entries? */
667	free(te->m_fifo, M_ATHDEV);
668	return (0);
669}
670
671static int
672ath_edma_dma_txsetup(struct ath_softc *sc)
673{
674	int error;
675	int i;
676
677	error = ath_descdma_alloc_desc(sc, &sc->sc_txsdma,
678	    NULL, "txcomp", sc->sc_tx_statuslen, ATH_TXSTATUS_RING_SIZE);
679	if (error != 0)
680		return (error);
681
682	ath_hal_setuptxstatusring(sc->sc_ah,
683	    (void *) sc->sc_txsdma.dd_desc,
684	    sc->sc_txsdma.dd_desc_paddr,
685	    ATH_TXSTATUS_RING_SIZE);
686
687	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
688		ath_edma_setup_txfifo(sc, i);
689	}
690
691	return (0);
692}
693
694static int
695ath_edma_dma_txteardown(struct ath_softc *sc)
696{
697	int i;
698
699	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
700		ath_edma_free_txfifo(sc, i);
701	}
702
703	ath_descdma_cleanup(sc, &sc->sc_txsdma, NULL);
704	return (0);
705}
706
707/*
708 * Drain all TXQs, potentially after completing the existing completed
709 * frames.
710 */
711static void
712ath_edma_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
713{
714	int i;
715
716	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
717
718	(void) ath_stoptxdma(sc);
719
720	/*
721	 * If reset type is noloss, the TX FIFO needs to be serviced
722	 * and those frames need to be handled.
723	 *
724	 * Otherwise, just toss everything in each TX queue.
725	 */
726	if (reset_type == ATH_RESET_NOLOSS) {
727		ath_edma_tx_processq(sc, 0);
728		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
729			if (ATH_TXQ_SETUP(sc, i)) {
730				ATH_TXQ_LOCK(&sc->sc_txq[i]);
731				/*
732				 * Free the holding buffer; DMA is now
733				 * stopped.
734				 */
735				ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
736				/*
737				 * Reset the link pointer to NULL; there's
738				 * no frames to chain DMA to.
739				 */
740				sc->sc_txq[i].axq_link = NULL;
741				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
742			}
743		}
744	} else {
745		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
746			if (ATH_TXQ_SETUP(sc, i))
747				ath_tx_draintxq(sc, &sc->sc_txq[i]);
748		}
749	}
750
751	/* XXX dump out the TX completion FIFO contents */
752
753	/* XXX dump out the frames */
754
755	sc->sc_wd_timer = 0;
756}
757
758/*
759 * TX completion tasklet.
760 */
761
762static void
763ath_edma_tx_proc(void *arg, int npending)
764{
765	struct ath_softc *sc = (struct ath_softc *) arg;
766
767	ATH_PCU_LOCK(sc);
768	sc->sc_txproc_cnt++;
769	ATH_PCU_UNLOCK(sc);
770
771	ATH_LOCK(sc);
772	ath_power_set_power_state(sc, HAL_PM_AWAKE);
773	ATH_UNLOCK(sc);
774
775#if 0
776	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n",
777	    __func__, npending);
778#endif
779	ath_edma_tx_processq(sc, 1);
780
781	ATH_PCU_LOCK(sc);
782	sc->sc_txproc_cnt--;
783	ATH_PCU_UNLOCK(sc);
784
785	ATH_LOCK(sc);
786	ath_power_restore_power_state(sc);
787	ATH_UNLOCK(sc);
788
789	ath_tx_kick(sc);
790}
791
792/*
793 * Process the TX status queue.
794 */
795static void
796ath_edma_tx_processq(struct ath_softc *sc, int dosched)
797{
798	struct ath_hal *ah = sc->sc_ah;
799	HAL_STATUS status;
800	struct ath_tx_status ts;
801	struct ath_txq *txq;
802	struct ath_buf *bf;
803	struct ieee80211_node *ni;
804	int nacked = 0;
805	int idx;
806	int i;
807
808#ifdef	ATH_DEBUG
809	/* XXX */
810	uint32_t txstatus[32];
811#endif
812
813	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called\n", __func__);
814
815	for (idx = 0; ; idx++) {
816		bzero(&ts, sizeof(ts));
817
818		ATH_TXSTATUS_LOCK(sc);
819#ifdef	ATH_DEBUG
820		ath_hal_gettxrawtxdesc(ah, txstatus);
821#endif
822		status = ath_hal_txprocdesc(ah, NULL, (void *) &ts);
823		ATH_TXSTATUS_UNLOCK(sc);
824
825		if (status == HAL_EINPROGRESS) {
826			DPRINTF(sc, ATH_DEBUG_TX_PROC,
827			    "%s: (%d): EINPROGRESS\n",
828			    __func__, idx);
829			break;
830		}
831
832#ifdef	ATH_DEBUG
833		if (sc->sc_debug & ATH_DEBUG_TX_PROC)
834			if (ts.ts_queue_id != sc->sc_bhalq)
835			ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id,
836			    idx, (status == HAL_OK));
837#endif
838
839		/*
840		 * If there is an error with this descriptor, continue
841		 * processing.
842		 *
843		 * XXX TBD: log some statistics?
844		 */
845		if (status == HAL_EIO) {
846			device_printf(sc->sc_dev, "%s: invalid TX status?\n",
847			    __func__);
848			break;
849		}
850
851#if defined(ATH_DEBUG_ALQ) && defined(ATH_DEBUG)
852		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS)) {
853			if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
854			    sc->sc_tx_statuslen,
855			    (char *) txstatus);
856		}
857#endif /* ATH_DEBUG_ALQ */
858
859		/*
860		 * At this point we have a valid status descriptor.
861		 * The QID and descriptor ID (which currently isn't set)
862		 * is part of the status.
863		 *
864		 * We then assume that the descriptor in question is the
865		 * -head- of the given QID.  Eventually we should verify
866		 * this by using the descriptor ID.
867		 */
868
869		/*
870		 * The beacon queue is not currently a "real" queue.
871		 * Frames aren't pushed onto it and the lock isn't setup.
872		 * So skip it for now; the beacon handling code will
873		 * free and alloc more beacon buffers as appropriate.
874		 */
875		if (ts.ts_queue_id == sc->sc_bhalq)
876			continue;
877
878		txq = &sc->sc_txq[ts.ts_queue_id];
879
880		ATH_TXQ_LOCK(txq);
881		bf = ATH_TXQ_FIRST(&txq->fifo);
882
883		/*
884		 * Work around the situation where I'm seeing notifications
885		 * for Q1 when no frames are available.  That needs to be
886		 * debugged but not by crashing _here_.
887		 */
888		if (bf == NULL) {
889			device_printf(sc->sc_dev, "%s: Q%d: empty?\n",
890			    __func__,
891			    ts.ts_queue_id);
892			ATH_TXQ_UNLOCK(txq);
893			continue;
894		}
895
896		DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d, bf=%p, start=%d, end=%d\n",
897		    __func__,
898		    ts.ts_queue_id, bf,
899		    !! (bf->bf_flags & ATH_BUF_FIFOPTR),
900		    !! (bf->bf_flags & ATH_BUF_FIFOEND));
901
902		/* XXX TODO: actually output debugging info about this */
903
904#if 0
905		/* XXX assert the buffer/descriptor matches the status descid */
906		if (ts.ts_desc_id != bf->bf_descid) {
907			device_printf(sc->sc_dev,
908			    "%s: mismatched descid (qid=%d, tsdescid=%d, "
909			    "bfdescid=%d\n",
910			    __func__,
911			    ts.ts_queue_id,
912			    ts.ts_desc_id,
913			    bf->bf_descid);
914		}
915#endif
916
917		/* This removes the buffer and decrements the queue depth */
918		ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
919		if (bf->bf_state.bfs_aggr)
920			txq->axq_aggr_depth--;
921
922		/*
923		 * If this was the end of a FIFO set, decrement FIFO depth
924		 */
925		if (bf->bf_flags & ATH_BUF_FIFOEND)
926			txq->axq_fifo_depth--;
927
928		/*
929		 * If this isn't the final buffer in a FIFO set, mark
930		 * the buffer as busy so it goes onto the holding queue.
931		 */
932		if (! (bf->bf_flags & ATH_BUF_FIFOEND))
933			bf->bf_flags |= ATH_BUF_BUSY;
934
935		DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: FIFO depth is now %d (%d)\n",
936		    __func__,
937		    txq->axq_qnum,
938		    txq->axq_fifo_depth,
939		    txq->fifo.axq_depth);
940
941		/* XXX assert FIFO depth >= 0 */
942		ATH_TXQ_UNLOCK(txq);
943
944		/*
945		 * Outside of the TX lock - if the buffer is end
946		 * end buffer in this FIFO, we don't need a holding
947		 * buffer any longer.
948		 */
949		if (bf->bf_flags & ATH_BUF_FIFOEND) {
950			ATH_TXQ_LOCK(txq);
951			ath_txq_freeholdingbuf(sc, txq);
952			ATH_TXQ_UNLOCK(txq);
953		}
954
955		/*
956		 * First we need to make sure ts_rate is valid.
957		 *
958		 * Pre-EDMA chips pass the whole TX descriptor to
959		 * the proctxdesc function which will then fill out
960		 * ts_rate based on the ts_finaltsi (final TX index)
961		 * in the TX descriptor.  However the TX completion
962		 * FIFO doesn't have this information.  So here we
963		 * do a separate HAL call to populate that information.
964		 *
965		 * The same problem exists with ts_longretry.
966		 * The FreeBSD HAL corrects ts_longretry in the HAL layer;
967		 * the AR9380 HAL currently doesn't.  So until the HAL
968		 * is imported and this can be added, we correct for it
969		 * here.
970		 */
971		/* XXX TODO */
972		/* XXX faked for now. Ew. */
973		if (ts.ts_finaltsi < 4) {
974			ts.ts_rate =
975			    bf->bf_state.bfs_rc[ts.ts_finaltsi].ratecode;
976			switch (ts.ts_finaltsi) {
977			case 3: ts.ts_longretry +=
978			    bf->bf_state.bfs_rc[2].tries;
979			case 2: ts.ts_longretry +=
980			    bf->bf_state.bfs_rc[1].tries;
981			case 1: ts.ts_longretry +=
982			    bf->bf_state.bfs_rc[0].tries;
983			}
984		} else {
985			device_printf(sc->sc_dev, "%s: finaltsi=%d\n",
986			    __func__,
987			    ts.ts_finaltsi);
988			ts.ts_rate = bf->bf_state.bfs_rc[0].ratecode;
989		}
990
991		/*
992		 * XXX This is terrible.
993		 *
994		 * Right now, some code uses the TX status that is
995		 * passed in here, but the completion handlers in the
996		 * software TX path also use bf_status.ds_txstat.
997		 * Ew.  That should all go away.
998		 *
999		 * XXX It's also possible the rate control completion
1000		 * routine is called twice.
1001		 */
1002		memcpy(&bf->bf_status, &ts, sizeof(ts));
1003
1004		ni = bf->bf_node;
1005
1006		/* Update RSSI */
1007		/* XXX duplicate from ath_tx_processq */
1008		if (ni != NULL && ts.ts_status == 0 &&
1009		    ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
1010			nacked++;
1011			sc->sc_stats.ast_tx_rssi = ts.ts_rssi;
1012			ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
1013			    ts.ts_rssi);
1014			ATH_RSSI_LPF(ATH_NODE(ni)->an_node_stats.ns_avgtxrssi,
1015			    ts.ts_rssi);
1016		}
1017
1018		/* Handle frame completion and rate control update */
1019		ath_tx_process_buf_completion(sc, txq, &ts, bf);
1020
1021		/* NB: bf is invalid at this point */
1022	}
1023
1024	sc->sc_wd_timer = 0;
1025
1026	/*
1027	 * XXX It's inefficient to do this if the FIFO queue is full,
1028	 * but there's no easy way right now to only populate
1029	 * the txq task for _one_ TXQ.  This should be fixed.
1030	 */
1031	if (dosched) {
1032		/* Attempt to schedule more hardware frames to the TX FIFO */
1033		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
1034			if (ATH_TXQ_SETUP(sc, i)) {
1035				ATH_TX_LOCK(sc);
1036				ath_txq_sched(sc, &sc->sc_txq[i]);
1037				ATH_TX_UNLOCK(sc);
1038
1039				ATH_TXQ_LOCK(&sc->sc_txq[i]);
1040				ath_edma_tx_fifo_fill(sc, &sc->sc_txq[i]);
1041				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
1042			}
1043		}
1044		/* Kick software scheduler */
1045		ath_tx_swq_kick(sc);
1046	}
1047
1048	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: end\n", __func__);
1049}
1050
1051static void
1052ath_edma_attach_comp_func(struct ath_softc *sc)
1053{
1054
1055	TASK_INIT(&sc->sc_txtask, 0, ath_edma_tx_proc, sc);
1056}
1057
1058void
1059ath_xmit_setup_edma(struct ath_softc *sc)
1060{
1061
1062	/* Fetch EDMA field and buffer sizes */
1063	(void) ath_hal_gettxdesclen(sc->sc_ah, &sc->sc_tx_desclen);
1064	(void) ath_hal_gettxstatuslen(sc->sc_ah, &sc->sc_tx_statuslen);
1065	(void) ath_hal_getntxmaps(sc->sc_ah, &sc->sc_tx_nmaps);
1066
1067	if (bootverbose) {
1068		device_printf(sc->sc_dev, "TX descriptor length: %d\n",
1069		    sc->sc_tx_desclen);
1070		device_printf(sc->sc_dev, "TX status length: %d\n",
1071		    sc->sc_tx_statuslen);
1072		device_printf(sc->sc_dev, "TX buffers per descriptor: %d\n",
1073		    sc->sc_tx_nmaps);
1074	}
1075
1076	sc->sc_tx.xmit_setup = ath_edma_dma_txsetup;
1077	sc->sc_tx.xmit_teardown = ath_edma_dma_txteardown;
1078	sc->sc_tx.xmit_attach_comp_func = ath_edma_attach_comp_func;
1079
1080	sc->sc_tx.xmit_dma_restart = ath_edma_dma_restart;
1081	sc->sc_tx.xmit_handoff = ath_edma_xmit_handoff;
1082	sc->sc_tx.xmit_drain = ath_edma_tx_drain;
1083}
1084