mlx4_en_tx.c revision 246581
1219820Sjeff/*
2219820Sjeff * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
3219820Sjeff *
4219820Sjeff * This software is available to you under a choice of one of two
5219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
6219820Sjeff * General Public License (GPL) Version 2, available from the file
7219820Sjeff * COPYING in the main directory of this source tree, or the
8219820Sjeff * OpenIB.org BSD license below:
9219820Sjeff *
10219820Sjeff *     Redistribution and use in source and binary forms, with or
11219820Sjeff *     without modification, are permitted provided that the following
12219820Sjeff *     conditions are met:
13219820Sjeff *
14219820Sjeff *      - Redistributions of source code must retain the above
15219820Sjeff *        copyright notice, this list of conditions and the following
16219820Sjeff *        disclaimer.
17219820Sjeff *
18219820Sjeff *      - Redistributions in binary form must reproduce the above
19219820Sjeff *        copyright notice, this list of conditions and the following
20219820Sjeff *        disclaimer in the documentation and/or other materials
21219820Sjeff *        provided with the distribution.
22219820Sjeff *
23219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30219820Sjeff * SOFTWARE.
31219820Sjeff *
32219820Sjeff */
33219820Sjeff
34219820Sjeff#include "mlx4_en.h"
35219820Sjeff
36219820Sjeff#include <linux/mlx4/cq.h>
37219820Sjeff#include <linux/mlx4/qp.h>
38219820Sjeff#include <linux/vmalloc.h>
39219820Sjeff
40219820Sjeff#include <net/ethernet.h>
41219820Sjeff#include <net/if_vlan_var.h>
42219820Sjeff#include <sys/mbuf.h>
43219820Sjeff
44219820Sjeff#include <netinet/in_systm.h>
45219820Sjeff#include <netinet/in.h>
46219820Sjeff#include <netinet/if_ether.h>
47219820Sjeff#include <netinet/ip.h>
48219820Sjeff#include <netinet/ip6.h>
49219820Sjeff#include <netinet/tcp.h>
50219820Sjeff#include <netinet/tcp_lro.h>
51219820Sjeff#include <netinet/udp.h>
52219820Sjeff
53219820Sjeffenum {
54219820Sjeff	MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */
55219820Sjeff	MAX_BF = 256,
56219820Sjeff};
57219820Sjeff
58219820Sjeffstatic int inline_thold = MAX_INLINE;
59219820Sjeff
60219820Sjeffmodule_param_named(inline_thold, inline_thold, int, 0444);
61219820SjeffMODULE_PARM_DESC(inline_thold, "treshold for using inline data");
62219820Sjeff
63219820Sjeffint mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
64219820Sjeff			   struct mlx4_en_tx_ring *ring, u32 size,
65219820Sjeff			   u16 stride)
66219820Sjeff{
67219820Sjeff	struct mlx4_en_dev *mdev = priv->mdev;
68219820Sjeff	int tmp;
69219820Sjeff	int err;
70219820Sjeff
71219820Sjeff	ring->size = size;
72219820Sjeff	ring->size_mask = size - 1;
73219820Sjeff	ring->stride = stride;
74219820Sjeff
75219820Sjeff	inline_thold = min(inline_thold, MAX_INLINE);
76219820Sjeff
77219820Sjeff	mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF);
78219820Sjeff	mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF);
79219820Sjeff
80219820Sjeff	/* Allocate the buf ring */
81219820Sjeff	ring->br = buf_ring_alloc(MLX4_EN_DEF_TX_QUEUE_SIZE, M_DEVBUF,
82219820Sjeff	    M_WAITOK, &ring->tx_lock.m);
83219820Sjeff	if (ring->br == NULL) {
84219820Sjeff		en_err(priv, "Failed allocating tx_info ring\n");
85219820Sjeff		return -ENOMEM;
86219820Sjeff	}
87219820Sjeff
88219820Sjeff	tmp = size * sizeof(struct mlx4_en_tx_info);
89219820Sjeff	ring->tx_info = kmalloc(tmp, GFP_KERNEL);
90219820Sjeff	if (!ring->tx_info) {
91219820Sjeff		en_err(priv, "Failed allocating tx_info ring\n");
92219820Sjeff		err = -ENOMEM;
93219820Sjeff		goto err_tx;
94219820Sjeff	}
95219820Sjeff	en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
96219820Sjeff		 ring->tx_info, tmp);
97219820Sjeff
98219820Sjeff	ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
99219820Sjeff	if (!ring->bounce_buf) {
100219820Sjeff		en_err(priv, "Failed allocating bounce buffer\n");
101219820Sjeff		err = -ENOMEM;
102219820Sjeff		goto err_tx;
103219820Sjeff	}
104219820Sjeff	ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
105219820Sjeff
106219820Sjeff	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
107219820Sjeff				 2 * PAGE_SIZE);
108219820Sjeff	if (err) {
109219820Sjeff		en_err(priv, "Failed allocating hwq resources\n");
110219820Sjeff		goto err_bounce;
111219820Sjeff	}
112219820Sjeff
113219820Sjeff	err = mlx4_en_map_buffer(&ring->wqres.buf);
114219820Sjeff	if (err) {
115219820Sjeff		en_err(priv, "Failed to map TX buffer\n");
116219820Sjeff		goto err_hwq_res;
117219820Sjeff	}
118219820Sjeff
119219820Sjeff	ring->buf = ring->wqres.buf.direct.buf;
120219820Sjeff
121219820Sjeff	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d "
122219820Sjeff	       "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size,
123219820Sjeff	       ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
124219820Sjeff
125219820Sjeff	err = mlx4_qp_reserve_range(mdev->dev, 1, 256, &ring->qpn);
126219820Sjeff	if (err) {
127219820Sjeff		en_err(priv, "Failed reserving qp for tx ring.\n");
128219820Sjeff		goto err_map;
129219820Sjeff	}
130219820Sjeff
131219820Sjeff	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp);
132219820Sjeff	if (err) {
133219820Sjeff		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
134219820Sjeff		goto err_reserve;
135219820Sjeff	}
136219820Sjeff	ring->qp.event = mlx4_en_sqp_event;
137219820Sjeff
138219820Sjeff	err = mlx4_bf_alloc(mdev->dev, &ring->bf);
139219820Sjeff	if (err) {
140219820Sjeff		ring->bf.uar = &mdev->priv_uar;
141219820Sjeff		ring->bf.uar->map = mdev->uar_map;
142219820Sjeff		ring->bf_enabled = false;
143219820Sjeff	} else
144219820Sjeff		ring->bf_enabled = true;
145219820Sjeff
146219820Sjeff	return 0;
147219820Sjeff
148219820Sjefferr_reserve:
149219820Sjeff	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
150219820Sjefferr_map:
151219820Sjeff	mlx4_en_unmap_buffer(&ring->wqres.buf);
152219820Sjefferr_hwq_res:
153219820Sjeff	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
154219820Sjefferr_bounce:
155219820Sjeff	kfree(ring->bounce_buf);
156219820Sjeff	ring->bounce_buf = NULL;
157219820Sjefferr_tx:
158219820Sjeff	buf_ring_free(ring->br, M_DEVBUF);
159219820Sjeff	kfree(ring->tx_info);
160219820Sjeff	ring->tx_info = NULL;
161219820Sjeff	return err;
162219820Sjeff}
163219820Sjeff
164219820Sjeffvoid mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
165219820Sjeff			     struct mlx4_en_tx_ring *ring)
166219820Sjeff{
167219820Sjeff	struct mlx4_en_dev *mdev = priv->mdev;
168219820Sjeff	en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
169219820Sjeff
170219820Sjeff	buf_ring_free(ring->br, M_DEVBUF);
171219820Sjeff	if (ring->bf_enabled)
172219820Sjeff		mlx4_bf_free(mdev->dev, &ring->bf);
173219820Sjeff	mlx4_qp_remove(mdev->dev, &ring->qp);
174219820Sjeff	mlx4_qp_free(mdev->dev, &ring->qp);
175219820Sjeff	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
176219820Sjeff	mlx4_en_unmap_buffer(&ring->wqres.buf);
177219820Sjeff	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
178219820Sjeff	kfree(ring->bounce_buf);
179219820Sjeff	ring->bounce_buf = NULL;
180219820Sjeff	kfree(ring->tx_info);
181219820Sjeff	ring->tx_info = NULL;
182219820Sjeff	mtx_destroy(&ring->tx_lock.m);
183219820Sjeff	mtx_destroy(&ring->comp_lock.m);
184219820Sjeff}
185219820Sjeff
186219820Sjeffint mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
187219820Sjeff			     struct mlx4_en_tx_ring *ring,
188219820Sjeff			     int cq)
189219820Sjeff{
190219820Sjeff	struct mlx4_en_dev *mdev = priv->mdev;
191219820Sjeff	int err;
192219820Sjeff
193219820Sjeff	ring->cqn = cq;
194219820Sjeff	ring->prod = 0;
195219820Sjeff	ring->cons = 0xffffffff;
196219820Sjeff	ring->last_nr_txbb = 1;
197219820Sjeff	ring->poll_cnt = 0;
198219820Sjeff	ring->blocked = 0;
199219820Sjeff	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
200219820Sjeff	memset(ring->buf, 0, ring->buf_size);
201219820Sjeff
202219820Sjeff	ring->qp_state = MLX4_QP_STATE_RST;
203219820Sjeff	ring->doorbell_qpn = swab32(ring->qp.qpn << 8);
204219820Sjeff
205219820Sjeff	mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
206219820Sjeff				ring->cqn, &ring->context);
207219820Sjeff	if (ring->bf_enabled)
208219820Sjeff		ring->context.usr_page = cpu_to_be32(ring->bf.uar->index);
209219820Sjeff
210219820Sjeff	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
211219820Sjeff			       &ring->qp, &ring->qp_state);
212219820Sjeff
213219820Sjeff	return err;
214219820Sjeff}
215219820Sjeff
216219820Sjeffvoid mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
217219820Sjeff				struct mlx4_en_tx_ring *ring)
218219820Sjeff{
219219820Sjeff	struct mlx4_en_dev *mdev = priv->mdev;
220219820Sjeff
221219820Sjeff	mlx4_qp_modify(mdev->dev, NULL, ring->qp_state,
222219820Sjeff		       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
223219820Sjeff}
224219820Sjeff
225219820Sjeff
226219820Sjeffstatic u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
227219820Sjeff				struct mlx4_en_tx_ring *ring,
228219820Sjeff				int index, u8 owner)
229219820Sjeff{
230219820Sjeff	struct mlx4_en_dev *mdev = priv->mdev;
231219820Sjeff	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
232219820Sjeff	struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
233219820Sjeff	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
234219820Sjeff	struct mbuf *mb = tx_info->mb;
235219820Sjeff	void *end = ring->buf + ring->buf_size;
236219820Sjeff	int frags = tx_info->nr_segs;
237219820Sjeff	int i;
238219820Sjeff	__be32 *ptr = (__be32 *)tx_desc;
239219820Sjeff	__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
240219820Sjeff
241219820Sjeff	/* Optimize the common case when there are no wraparounds */
242219820Sjeff	if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
243219820Sjeff		if (!tx_info->inl) {
244219820Sjeff			for (i = 0; i < frags; i++) {
245219820Sjeff				pci_unmap_single(mdev->pdev,
246219820Sjeff					(dma_addr_t) be64_to_cpu(data[i].addr),
247219820Sjeff					data[i].byte_count, PCI_DMA_TODEVICE);
248219820Sjeff			}
249219820Sjeff		}
250219820Sjeff		/* Stamp the freed descriptor */
251219820Sjeff		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
252219820Sjeff			*ptr = stamp;
253219820Sjeff			ptr += STAMP_DWORDS;
254219820Sjeff		}
255219820Sjeff
256219820Sjeff	} else {
257219820Sjeff		if (!tx_info->inl) {
258219820Sjeff			for (i = 0; i < frags; i++) {
259219820Sjeff				/* Check for wraparound before unmapping */
260219820Sjeff				if ((void *) data >= end)
261219820Sjeff					data = (struct mlx4_wqe_data_seg *) ring->buf;
262219820Sjeff				pci_unmap_single(mdev->pdev,
263219820Sjeff					(dma_addr_t) be64_to_cpu(data->addr),
264219820Sjeff					data->byte_count, PCI_DMA_TODEVICE);
265219820Sjeff				++data;
266219820Sjeff			}
267219820Sjeff		}
268219820Sjeff		/* Stamp the freed descriptor */
269219820Sjeff		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
270219820Sjeff			*ptr = stamp;
271219820Sjeff			ptr += STAMP_DWORDS;
272219820Sjeff			if ((void *) ptr >= end) {
273219820Sjeff				ptr = ring->buf;
274219820Sjeff				stamp ^= cpu_to_be32(0x80000000);
275219820Sjeff			}
276219820Sjeff		}
277219820Sjeff
278219820Sjeff	}
279219820Sjeff	m_freem(mb);
280219820Sjeff	return tx_info->nr_txbb;
281219820Sjeff}
282219820Sjeff
283219820Sjeff
284219820Sjeffint mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
285219820Sjeff{
286219820Sjeff	struct mlx4_en_priv *priv = netdev_priv(dev);
287219820Sjeff	int cnt = 0;
288219820Sjeff
289219820Sjeff	/* Skip last polled descriptor */
290219820Sjeff	ring->cons += ring->last_nr_txbb;
291219820Sjeff	en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
292219820Sjeff		 ring->cons, ring->prod);
293219820Sjeff
294219820Sjeff	if ((u32) (ring->prod - ring->cons) > ring->size) {
295219820Sjeff		en_warn(priv, "Tx consumer passed producer!\n");
296219820Sjeff		return 0;
297219820Sjeff	}
298219820Sjeff
299219820Sjeff	while (ring->cons != ring->prod) {
300219820Sjeff		ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring,
301219820Sjeff						ring->cons & ring->size_mask,
302219820Sjeff						!!(ring->cons & ring->size));
303219820Sjeff		ring->cons += ring->last_nr_txbb;
304219820Sjeff		cnt++;
305219820Sjeff	}
306219820Sjeff
307219820Sjeff	if (cnt)
308219820Sjeff		en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
309219820Sjeff
310219820Sjeff	return cnt;
311219820Sjeff}
312219820Sjeff
313219820Sjeffvoid mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num)
314219820Sjeff{
315219820Sjeff	int block = 8 / ring_num;
316219820Sjeff	int extra = 8 - (block * ring_num);
317219820Sjeff	int num = 0;
318219820Sjeff	u16 ring = 1;
319219820Sjeff	int prio;
320219820Sjeff
321219820Sjeff	if (ring_num == 1) {
322219820Sjeff		for (prio = 0; prio < 8; prio++)
323219820Sjeff			prio_map[prio] = 0;
324219820Sjeff		return;
325219820Sjeff	}
326219820Sjeff
327219820Sjeff	for (prio = 0; prio < 8; prio++) {
328219820Sjeff		if (extra && (num == block + 1)) {
329219820Sjeff			ring++;
330219820Sjeff			num = 0;
331219820Sjeff			extra--;
332219820Sjeff		} else if (!extra && (num == block)) {
333219820Sjeff			ring++;
334219820Sjeff			num = 0;
335219820Sjeff		}
336219820Sjeff		prio_map[prio] = ring;
337219820Sjeff		en_dbg(DRV, priv, " prio:%d --> ring:%d\n", prio, ring);
338219820Sjeff		num++;
339219820Sjeff	}
340219820Sjeff}
341219820Sjeff
342219820Sjeffstatic void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
343219820Sjeff{
344219820Sjeff	struct mlx4_en_priv *priv = netdev_priv(dev);
345219820Sjeff	struct mlx4_cq *mcq = &cq->mcq;
346219820Sjeff	struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
347219820Sjeff	struct mlx4_cqe *cqe = cq->buf;
348219820Sjeff	u16 index;
349219820Sjeff	u16 new_index;
350219820Sjeff	u32 txbbs_skipped = 0;
351219820Sjeff	u32 cq_last_sav;
352219820Sjeff
353219820Sjeff	/* index always points to the first TXBB of the last polled descriptor */
354219820Sjeff	index = ring->cons & ring->size_mask;
355219820Sjeff	new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
356219820Sjeff	if (index == new_index)
357219820Sjeff		return;
358219820Sjeff
359219820Sjeff	if (!priv->port_up)
360219820Sjeff		return;
361219820Sjeff
362219820Sjeff	/*
363219820Sjeff	 * We use a two-stage loop:
364219820Sjeff	 * - the first samples the HW-updated CQE
365219820Sjeff	 * - the second frees TXBBs until the last sample
366219820Sjeff	 * This lets us amortize CQE cache misses, while still polling the CQ
367219820Sjeff	 * until is quiescent.
368219820Sjeff	 */
369219820Sjeff	cq_last_sav = mcq->cons_index;
370219820Sjeff	do {
371219820Sjeff		do {
372219820Sjeff			/* Skip over last polled CQE */
373219820Sjeff			index = (index + ring->last_nr_txbb) & ring->size_mask;
374219820Sjeff			txbbs_skipped += ring->last_nr_txbb;
375219820Sjeff
376219820Sjeff			/* Poll next CQE */
377219820Sjeff			ring->last_nr_txbb = mlx4_en_free_tx_desc(
378219820Sjeff						priv, ring, index,
379219820Sjeff						!!((ring->cons + txbbs_skipped) &
380219820Sjeff						   ring->size));
381219820Sjeff			++mcq->cons_index;
382219820Sjeff
383219820Sjeff		} while (index != new_index);
384219820Sjeff
385219820Sjeff		new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
386219820Sjeff	} while (index != new_index);
387219820Sjeff	AVG_PERF_COUNTER(priv->pstats.tx_coal_avg,
388219820Sjeff			 (u32) (mcq->cons_index - cq_last_sav));
389219820Sjeff
390219820Sjeff	/*
391219820Sjeff	 * To prevent CQ overflow we first update CQ consumer and only then
392219820Sjeff	 * the ring consumer.
393219820Sjeff	 */
394219820Sjeff	mlx4_cq_set_ci(mcq);
395219820Sjeff	wmb();
396219820Sjeff	ring->cons += txbbs_skipped;
397219820Sjeff
398219820Sjeff	/* Wakeup Tx queue if this ring stopped it */
399219820Sjeff	if (unlikely(ring->blocked)) {
400219820Sjeff		if ((u32) (ring->prod - ring->cons) <=
401219820Sjeff		     ring->size - HEADROOM - MAX_DESC_TXBBS) {
402219820Sjeff			ring->blocked = 0;
403219820Sjeff			if (atomic_fetchadd_int(&priv->blocked, -1) == 1)
404219820Sjeff				atomic_clear_int(&dev->if_drv_flags,
405219820Sjeff				    IFF_DRV_OACTIVE);
406219820Sjeff			priv->port_stats.wake_queue++;
407219820Sjeff		}
408219820Sjeff	}
409219820Sjeff}
410219820Sjeff
411219820Sjeffvoid mlx4_en_tx_irq(struct mlx4_cq *mcq)
412219820Sjeff{
413219820Sjeff	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
414219820Sjeff	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
415219820Sjeff	struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
416219820Sjeff
417219820Sjeff	if (!spin_trylock(&ring->comp_lock))
418219820Sjeff		return;
419219820Sjeff	mlx4_en_process_tx_cq(cq->dev, cq);
420219820Sjeff	mod_timer(&cq->timer, jiffies + 1);
421219820Sjeff	spin_unlock(&ring->comp_lock);
422219820Sjeff}
423219820Sjeff
424219820Sjeff
425219820Sjeffvoid mlx4_en_poll_tx_cq(unsigned long data)
426219820Sjeff{
427219820Sjeff	struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data;
428219820Sjeff	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
429219820Sjeff	struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
430219820Sjeff	u32 inflight;
431219820Sjeff
432219820Sjeff	INC_PERF_COUNTER(priv->pstats.tx_poll);
433219820Sjeff
434219820Sjeff	if (!spin_trylock(&ring->comp_lock)) {
435219820Sjeff		mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
436219820Sjeff		return;
437219820Sjeff	}
438219820Sjeff	mlx4_en_process_tx_cq(cq->dev, cq);
439219820Sjeff	inflight = (u32) (ring->prod - ring->cons - ring->last_nr_txbb);
440219820Sjeff
441219820Sjeff	/* If there are still packets in flight and the timer has not already
442219820Sjeff	 * been scheduled by the Tx routine then schedule it here to guarantee
443219820Sjeff	 * completion processing of these packets */
444219820Sjeff	if (inflight && priv->port_up)
445219820Sjeff		mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
446219820Sjeff
447219820Sjeff	spin_unlock(&ring->comp_lock);
448219820Sjeff}
449219820Sjeff
450219820Sjeffstatic struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
451219820Sjeff						      struct mlx4_en_tx_ring *ring,
452219820Sjeff						      u32 index,
453219820Sjeff						      unsigned int desc_size)
454219820Sjeff{
455219820Sjeff	u32 copy = (ring->size - index) * TXBB_SIZE;
456219820Sjeff	int i;
457219820Sjeff
458219820Sjeff	for (i = desc_size - copy - 4; i >= 0; i -= 4) {
459219820Sjeff		if ((i & (TXBB_SIZE - 1)) == 0)
460219820Sjeff			wmb();
461219820Sjeff
462219820Sjeff		*((u32 *) (ring->buf + i)) =
463219820Sjeff			*((u32 *) (ring->bounce_buf + copy + i));
464219820Sjeff	}
465219820Sjeff
466219820Sjeff	for (i = copy - 4; i >= 4 ; i -= 4) {
467219820Sjeff		if ((i & (TXBB_SIZE - 1)) == 0)
468219820Sjeff			wmb();
469219820Sjeff
470219820Sjeff		*((u32 *) (ring->buf + index * TXBB_SIZE + i)) =
471219820Sjeff			*((u32 *) (ring->bounce_buf + i));
472219820Sjeff	}
473219820Sjeff
474219820Sjeff	/* Return real descriptor location */
475219820Sjeff	return ring->buf + index * TXBB_SIZE;
476219820Sjeff}
477219820Sjeff
478219820Sjeffstatic inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind)
479219820Sjeff{
480219820Sjeff	struct mlx4_en_cq *cq = &priv->tx_cq[tx_ind];
481219820Sjeff	struct mlx4_en_tx_ring *ring = &priv->tx_ring[tx_ind];
482219820Sjeff
483219820Sjeff	/* If we don't have a pending timer, set one up to catch our recent
484219820Sjeff	   post in case the interface becomes idle */
485219820Sjeff	if (!timer_pending(&cq->timer))
486219820Sjeff		mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);
487219820Sjeff
488219820Sjeff	/* Poll the CQ every mlx4_en_TX_MODER_POLL packets */
489219820Sjeff	if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0)
490219820Sjeff		if (spin_trylock(&ring->comp_lock)) {
491219820Sjeff			mlx4_en_process_tx_cq(priv->dev, cq);
492219820Sjeff			spin_unlock(&ring->comp_lock);
493219820Sjeff		}
494219820Sjeff}
495219820Sjeff
496219820Sjeffstatic int is_inline(struct mbuf *mb)
497219820Sjeff{
498219820Sjeff
499219820Sjeff	if (inline_thold && mb->m_pkthdr.len <= inline_thold &&
500219820Sjeff	    (mb->m_pkthdr.csum_flags & CSUM_TSO) == 0)
501219820Sjeff		return 1;
502219820Sjeff
503219820Sjeff	return 0;
504219820Sjeff}
505219820Sjeff
506219820Sjeffstatic int inline_size(struct mbuf *mb)
507219820Sjeff{
508219820Sjeff	int len;
509219820Sjeff
510219820Sjeff	len = mb->m_pkthdr.len;
511219820Sjeff	if (len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
512219820Sjeff	    <= MLX4_INLINE_ALIGN)
513219820Sjeff		return ALIGN(len + CTRL_SIZE +
514219820Sjeff			     sizeof(struct mlx4_wqe_inline_seg), 16);
515219820Sjeff	else
516219820Sjeff		return ALIGN(len + CTRL_SIZE + 2 *
517219820Sjeff			     sizeof(struct mlx4_wqe_inline_seg), 16);
518219820Sjeff}
519219820Sjeff
520219820Sjeffstatic int get_head_size(struct mbuf *mb)
521219820Sjeff{
522219820Sjeff	struct tcphdr *th;
523219820Sjeff	struct ip *ip;
524219820Sjeff	int ip_hlen, tcp_hlen;
525219820Sjeff	int len;
526219820Sjeff
527219820Sjeff	len = ETHER_HDR_LEN;
528219820Sjeff	if (mb->m_len < len + sizeof(struct ip))
529219820Sjeff		return (0);
530219820Sjeff	ip = (struct ip *)(mtod(mb, char *) + len);
531219820Sjeff	if (ip->ip_p != IPPROTO_TCP)
532219820Sjeff		return (0);
533219820Sjeff	ip_hlen = ip->ip_hl << 2;
534219820Sjeff	len += ip_hlen;
535219820Sjeff	if (mb->m_len < len + sizeof(struct tcphdr))
536219820Sjeff		return (0);
537219820Sjeff	th = (struct tcphdr *)(mtod(mb, char *) + len);
538219820Sjeff	tcp_hlen = th->th_off << 2;
539219820Sjeff	len += tcp_hlen;
540219820Sjeff	if (mb->m_len < len)
541219820Sjeff		return (0);
542219820Sjeff	return (len);
543219820Sjeff}
544219820Sjeff
545219820Sjeffstatic int get_real_size(struct mbuf *mb, struct net_device *dev, int *segsp,
546219820Sjeff    int *lso_header_size)
547219820Sjeff{
548219820Sjeff	struct mbuf *m;
549219820Sjeff	int nr_segs;
550219820Sjeff
551219820Sjeff	nr_segs = 0;
552219820Sjeff	for (m = mb; m != NULL; m = m->m_next)
553219820Sjeff		if (m->m_len)
554219820Sjeff			nr_segs++;
555219820Sjeff
556219820Sjeff	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
557219820Sjeff		*lso_header_size = get_head_size(mb);
558219820Sjeff		if (*lso_header_size) {
559219820Sjeff			if (mb->m_len == *lso_header_size)
560219820Sjeff				nr_segs--;
561219820Sjeff			*segsp = nr_segs;
562219820Sjeff			return CTRL_SIZE + nr_segs * DS_SIZE +
563219820Sjeff			    ALIGN(*lso_header_size + 4, DS_SIZE);
564219820Sjeff		}
565219820Sjeff	} else
566219820Sjeff		*lso_header_size = 0;
567219820Sjeff	*segsp = nr_segs;
568219820Sjeff	if (is_inline(mb))
569219820Sjeff		return inline_size(mb);
570219820Sjeff	return (CTRL_SIZE + nr_segs * DS_SIZE);
571219820Sjeff}
572219820Sjeff
573219820Sjeffstatic struct mbuf *mb_copy(struct mbuf *mb, int *offp, char *data, int len)
574219820Sjeff{
575219820Sjeff	int bytes;
576219820Sjeff	int off;
577219820Sjeff
578219820Sjeff	off = *offp;
579219820Sjeff	while (len) {
580219820Sjeff		bytes = min(mb->m_len - off, len);
581219820Sjeff		if (bytes)
582219820Sjeff			memcpy(data, mb->m_data + off, bytes);
583219820Sjeff		len -= bytes;
584219820Sjeff		data += bytes;
585219820Sjeff		off += bytes;
586219820Sjeff		if (off == mb->m_len) {
587219820Sjeff			off = 0;
588219820Sjeff			mb = mb->m_next;
589219820Sjeff		}
590219820Sjeff	}
591219820Sjeff	*offp = off;
592219820Sjeff	return (mb);
593219820Sjeff}
594219820Sjeff
595219820Sjeffstatic void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct mbuf *mb,
596219820Sjeff			     int real_size, u16 *vlan_tag, int tx_ind)
597219820Sjeff{
598219820Sjeff	struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
599219820Sjeff	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
600219820Sjeff	int len;
601219820Sjeff	int off;
602219820Sjeff
603219820Sjeff	off = 0;
604219820Sjeff	len = mb->m_pkthdr.len;
605219820Sjeff	if (len <= spc) {
606219820Sjeff		inl->byte_count = cpu_to_be32(1 << 31 | len);
607219820Sjeff		mb_copy(mb, &off, (void *)(inl + 1), len);
608219820Sjeff	} else {
609219820Sjeff		inl->byte_count = cpu_to_be32(1 << 31 | spc);
610219820Sjeff		mb = mb_copy(mb, &off, (void *)(inl + 1), spc);
611219820Sjeff		inl = (void *) (inl + 1) + spc;
612219820Sjeff		mb_copy(mb, &off, (void *)(inl + 1), len - spc);
613219820Sjeff		wmb();
614219820Sjeff		inl->byte_count = cpu_to_be32(1 << 31 | (len - spc));
615219820Sjeff	}
616219820Sjeff	tx_desc->ctrl.vlan_tag = cpu_to_be16(*vlan_tag);
617219820Sjeff	tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!(*vlan_tag);
618219820Sjeff	tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
619219820Sjeff}
620219820Sjeff
621219820Sjeffu16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb)
622219820Sjeff{
623219820Sjeff	struct mlx4_en_priv *priv = netdev_priv(dev);
624219820Sjeff	struct mlx4_en_tx_hash_entry *entry;
625219820Sjeff	struct ether_header *eth;
626219820Sjeff	struct tcphdr *th;
627219820Sjeff	struct ip *iph;
628219820Sjeff	u32 hash_index;
629219820Sjeff	int tx_ind = 0;
630219820Sjeff	u16 vlan_tag = 0;
631219820Sjeff	int len;
632219820Sjeff
633219820Sjeff	/* Obtain VLAN information if present */
634219820Sjeff	if (mb->m_flags & M_VLANTAG) {
635219820Sjeff		vlan_tag = mb->m_pkthdr.ether_vtag;
636219820Sjeff		/* Set the Tx ring to use according to vlan priority */
637219820Sjeff		tx_ind = priv->tx_prio_map[vlan_tag >> 13];
638219820Sjeff		if (tx_ind)
639219820Sjeff			return tx_ind;
640219820Sjeff	}
641219820Sjeff	if (mb->m_len <
642219820Sjeff	    ETHER_HDR_LEN + sizeof(struct ip) + sizeof(struct tcphdr))
643219820Sjeff		return MLX4_EN_NUM_HASH_RINGS;
644219820Sjeff	eth = mtod(mb, struct ether_header *);
645219820Sjeff	/* Hashing is only done for TCP/IP or UDP/IP packets */
646219820Sjeff	if (be16_to_cpu(eth->ether_type) != ETHERTYPE_IP)
647219820Sjeff		return MLX4_EN_NUM_HASH_RINGS;
648219820Sjeff	len = ETHER_HDR_LEN;
649219820Sjeff	iph = (struct ip *)(mtod(mb, char *) + len);
650219820Sjeff	len += iph->ip_hl << 2;
651219820Sjeff	th = (struct tcphdr *)(mtod(mb, char *) + len);
652219820Sjeff	hash_index = be32_to_cpu(iph->ip_dst.s_addr) & MLX4_EN_TX_HASH_MASK;
653219820Sjeff	switch(iph->ip_p) {
654219820Sjeff	case IPPROTO_UDP:
655219820Sjeff		break;
656219820Sjeff	case IPPROTO_TCP:
657219820Sjeff		if (mb->m_len < len + sizeof(struct tcphdr))
658219820Sjeff			return MLX4_EN_NUM_HASH_RINGS;
659219820Sjeff		hash_index =
660219820Sjeff		    (hash_index ^ be16_to_cpu(th->th_dport ^ th->th_sport)) &
661219820Sjeff		    MLX4_EN_TX_HASH_MASK;
662219820Sjeff		break;
663219820Sjeff	default:
664219820Sjeff		return MLX4_EN_NUM_HASH_RINGS;
665219820Sjeff	}
666219820Sjeff
667219820Sjeff	entry = &priv->tx_hash[hash_index];
668219820Sjeff	if(unlikely(!entry->cnt)) {
669219820Sjeff		tx_ind = hash_index & (MLX4_EN_NUM_HASH_RINGS / 2 - 1);
670219820Sjeff		if (2 * entry->small_pkts > entry->big_pkts)
671219820Sjeff			tx_ind += MLX4_EN_NUM_HASH_RINGS / 2;
672219820Sjeff		entry->small_pkts = entry->big_pkts = 0;
673219820Sjeff		entry->ring = tx_ind;
674219820Sjeff	}
675219820Sjeff
676219820Sjeff	entry->cnt++;
677219820Sjeff	if (mb->m_pkthdr.len > MLX4_EN_SMALL_PKT_SIZE)
678219820Sjeff		entry->big_pkts++;
679219820Sjeff	else
680219820Sjeff		entry->small_pkts++;
681219820Sjeff	return entry->ring;
682219820Sjeff}
683219820Sjeff
684219820Sjeffstatic void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt)
685219820Sjeff{
686219820Sjeff	__iowrite64_copy(dst, src, bytecnt / 8);
687219820Sjeff}
688219820Sjeff
689219820Sjeffstatic int mlx4_en_xmit(struct net_device *dev, int tx_ind, struct mbuf **mbp)
690219820Sjeff{
691219820Sjeff	struct mlx4_en_priv *priv = netdev_priv(dev);
692219820Sjeff	struct mlx4_en_dev *mdev = priv->mdev;
693219820Sjeff	struct mlx4_en_tx_ring *ring;
694219820Sjeff	struct mlx4_en_cq *cq;
695219820Sjeff	struct mlx4_en_tx_desc *tx_desc;
696219820Sjeff	struct mlx4_wqe_data_seg *data;
697219820Sjeff	struct mlx4_en_tx_info *tx_info;
698219820Sjeff	struct mbuf *m;
699219820Sjeff	int nr_txbb;
700219820Sjeff	int nr_segs;
701219820Sjeff	int desc_size;
702219820Sjeff	int real_size;
703219820Sjeff	dma_addr_t dma;
704219820Sjeff	u32 index, bf_index;
705219820Sjeff	__be32 op_own;
706219820Sjeff	u16 vlan_tag = 0;
707219820Sjeff	int i;
708219820Sjeff	int lso_header_size;
709219820Sjeff	bool bounce = false;
710219820Sjeff	struct mbuf *mb;
711219820Sjeff	int defrag = 1;
712219820Sjeff
713219820Sjeff	ring = &priv->tx_ring[tx_ind];
714219820Sjeff	mb = *mbp;
715219820Sjeff	if (!priv->port_up)
716219820Sjeff		goto tx_drop;
717219820Sjeff
718219820Sjeffretry:
719219820Sjeff	real_size = get_real_size(mb, dev, &nr_segs, &lso_header_size);
720219820Sjeff	if (unlikely(!real_size))
721219820Sjeff		goto tx_drop;
722219820Sjeff
723219820Sjeff	/* Allign descriptor to TXBB size */
724219820Sjeff	desc_size = ALIGN(real_size, TXBB_SIZE);
725219820Sjeff	nr_txbb = desc_size / TXBB_SIZE;
726219820Sjeff	if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
727219820Sjeff		if (defrag) {
728243882Sglebius			mb = m_defrag(*mbp, M_NOWAIT);
729219820Sjeff			if (mb == NULL) {
730219820Sjeff				mb = *mbp;
731219820Sjeff				goto tx_drop;
732219820Sjeff			}
733219820Sjeff			*mbp = mb;
734219820Sjeff			defrag = 0;
735219820Sjeff			goto retry;
736219820Sjeff		}
737219820Sjeff		goto tx_drop;
738219820Sjeff	}
739219820Sjeff
740219820Sjeff	/* Check available TXBBs And 2K spare for prefetch */
741219820Sjeff	if (unlikely(((int)(ring->prod - ring->cons)) >
742219820Sjeff		     ring->size - HEADROOM - MAX_DESC_TXBBS)) {
743219820Sjeff		/* every full Tx ring stops queue */
744219820Sjeff		if (ring->blocked == 0)
745219820Sjeff			atomic_add_int(&priv->blocked, 1);
746219820Sjeff		atomic_set_int(&dev->if_drv_flags, IFF_DRV_OACTIVE);
747219820Sjeff		ring->blocked = 1;
748219820Sjeff		priv->port_stats.queue_stopped++;
749219820Sjeff
750219820Sjeff		/* Use interrupts to find out when queue opened */
751219820Sjeff		cq = &priv->tx_cq[tx_ind];
752219820Sjeff		mlx4_en_arm_cq(priv, cq);
753219820Sjeff		return EBUSY;
754219820Sjeff	}
755219820Sjeff
756219820Sjeff	/* Track current inflight packets for performance analysis */
757219820Sjeff	AVG_PERF_COUNTER(priv->pstats.inflight_avg,
758219820Sjeff			 (u32) (ring->prod - ring->cons - 1));
759219820Sjeff
760219820Sjeff	/* Packet is good - grab an index and transmit it */
761219820Sjeff	index = ring->prod & ring->size_mask;
762219820Sjeff	bf_index = ring->prod;
763219820Sjeff
764219820Sjeff	/* See if we have enough space for whole descriptor TXBB for setting
765219820Sjeff	 * SW ownership on next descriptor; if not, use a bounce buffer. */
766219820Sjeff	if (likely(index + nr_txbb <= ring->size))
767219820Sjeff		tx_desc = ring->buf + index * TXBB_SIZE;
768219820Sjeff	else {
769219820Sjeff		tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
770219820Sjeff		bounce = true;
771219820Sjeff	}
772219820Sjeff
773219820Sjeff	/* Prepare ctrl segement apart opcode+ownership, which depends on
774219820Sjeff	 * whether LSO is used */
775219820Sjeff	if (mb->m_flags & M_VLANTAG)
776219820Sjeff		vlan_tag = mb->m_pkthdr.ether_vtag;
777219820Sjeff	tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
778219820Sjeff	tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag;
779219820Sjeff	tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
780219820Sjeff	tx_desc->ctrl.srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
781219820Sjeff						MLX4_WQE_CTRL_SOLICITED);
782219820Sjeff	if (mb->m_pkthdr.csum_flags & (CSUM_IP|CSUM_TCP|CSUM_UDP)) {
783219820Sjeff		tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
784219820Sjeff							 MLX4_WQE_CTRL_TCP_UDP_CSUM);
785219820Sjeff		priv->port_stats.tx_chksum_offload++;
786219820Sjeff	}
787219820Sjeff
788219820Sjeff	if (unlikely(priv->validate_loopback)) {
789219820Sjeff		/* Copy dst mac address to wqe */
790219820Sjeff		struct ether_header *ethh;
791219820Sjeff		u64 mac;
792219820Sjeff		u32 mac_l, mac_h;
793219820Sjeff
794219820Sjeff		ethh = mtod(mb, struct ether_header *);
795219820Sjeff		mac = mlx4_en_mac_to_u64(ethh->ether_dhost);
796219820Sjeff		if (mac) {
797219820Sjeff			mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16);
798219820Sjeff			mac_l = (u32) (mac & 0xffffffff);
799219820Sjeff			tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h);
800219820Sjeff			tx_desc->ctrl.imm = cpu_to_be32(mac_l);
801219820Sjeff		}
802219820Sjeff	}
803219820Sjeff
804219820Sjeff	/* Handle LSO (TSO) packets */
805219820Sjeff	if (lso_header_size) {
806219820Sjeff		int segsz;
807219820Sjeff
808219820Sjeff		/* Mark opcode as LSO */
809219820Sjeff		op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) |
810219820Sjeff			((ring->prod & ring->size) ?
811219820Sjeff				cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
812219820Sjeff
813219820Sjeff		/* Fill in the LSO prefix */
814219820Sjeff		tx_desc->lso.mss_hdr_size = cpu_to_be32(
815219820Sjeff			mb->m_pkthdr.tso_segsz << 16 | lso_header_size);
816219820Sjeff
817219820Sjeff		/* Copy headers;
818219820Sjeff		 * note that we already verified that it is linear */
819219820Sjeff		memcpy(tx_desc->lso.header, mb->m_data, lso_header_size);
820219820Sjeff		data = ((void *) &tx_desc->lso +
821219820Sjeff			ALIGN(lso_header_size + 4, DS_SIZE));
822219820Sjeff
823219820Sjeff		priv->port_stats.tso_packets++;
824219820Sjeff		segsz = mb->m_pkthdr.tso_segsz;
825219820Sjeff		i = ((mb->m_pkthdr.len - lso_header_size) / segsz) +
826219820Sjeff			!!((mb->m_pkthdr.len - lso_header_size) % segsz);
827219820Sjeff		ring->bytes += mb->m_pkthdr.len + (i - 1) * lso_header_size;
828219820Sjeff		ring->packets += i;
829219820Sjeff		mb->m_data += lso_header_size;
830219820Sjeff		mb->m_len -= lso_header_size;
831219820Sjeff	} else {
832219820Sjeff		/* Normal (Non LSO) packet */
833219820Sjeff		op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
834219820Sjeff			((ring->prod & ring->size) ?
835219820Sjeff			 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
836219820Sjeff		data = &tx_desc->data;
837219820Sjeff		ring->bytes += max(mb->m_pkthdr.len,
838219820Sjeff		    (unsigned int)ETHER_MIN_LEN - ETHER_CRC_LEN);
839219820Sjeff		ring->packets++;
840219820Sjeff
841219820Sjeff	}
842219820Sjeff	AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, mb->m_pkthdr.len);
843219820Sjeff
844219820Sjeff	/* Save mb in tx_info ring */
845219820Sjeff	tx_info = &ring->tx_info[index];
846219820Sjeff	tx_info->mb = mb;
847219820Sjeff	tx_info->nr_txbb = nr_txbb;
848219820Sjeff	tx_info->nr_segs = nr_segs;
849219820Sjeff	/* valid only for non inline segments */
850219820Sjeff	tx_info->data_offset = (void *) data - (void *) tx_desc;
851219820Sjeff
852219820Sjeff	if (!is_inline(mb)) {
853219820Sjeff		for (i = 0, m = mb; i < nr_segs; i++, m = m->m_next) {
854219820Sjeff			if (m->m_len == 0) {
855219820Sjeff				i--;
856219820Sjeff				continue;
857219820Sjeff			}
858219820Sjeff			dma = pci_map_single(mdev->dev->pdev, m->m_data,
859219820Sjeff					     m->m_len, PCI_DMA_TODEVICE);
860219820Sjeff			data->addr = cpu_to_be64(dma);
861219820Sjeff			data->lkey = cpu_to_be32(mdev->mr.key);
862219820Sjeff			wmb();
863219820Sjeff			data->byte_count = cpu_to_be32(m->m_len);
864219820Sjeff			data++;
865219820Sjeff		}
866219820Sjeff		if (lso_header_size) {
867219820Sjeff			mb->m_data -= lso_header_size;
868219820Sjeff			mb->m_len += lso_header_size;
869219820Sjeff		}
870219820Sjeff		tx_info->inl = 0;
871219820Sjeff	} else {
872219820Sjeff		build_inline_wqe(tx_desc, mb, real_size, &vlan_tag, tx_ind);
873219820Sjeff		tx_info->inl = 1;
874219820Sjeff	}
875219820Sjeff
876219820Sjeff	ring->prod += nr_txbb;
877219820Sjeff
878219820Sjeff	/* If we used a bounce buffer then copy descriptor back into place */
879219820Sjeff	if (bounce)
880219820Sjeff		tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
881219820Sjeff
882219820Sjeff	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) {
883219820Sjeff		*(u32 *) (&tx_desc->ctrl.vlan_tag) |= ring->doorbell_qpn;
884219820Sjeff		op_own |= htonl((bf_index & 0xffff) << 8);
885219820Sjeff		/* Ensure new descirptor hits memory
886219820Sjeff		* before setting ownership of this descriptor to HW */
887219820Sjeff		wmb();
888219820Sjeff		tx_desc->ctrl.owner_opcode = op_own;
889219820Sjeff
890219820Sjeff		wmb();
891219820Sjeff
892219820Sjeff		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, (unsigned long *) &tx_desc->ctrl,
893219820Sjeff		     desc_size);
894219820Sjeff
895219820Sjeff		wmb();
896219820Sjeff
897219820Sjeff		ring->bf.offset ^= ring->bf.buf_size;
898219820Sjeff	} else {
899219820Sjeff		/* Ensure new descirptor hits memory
900219820Sjeff		* before setting ownership of this descriptor to HW */
901219820Sjeff		wmb();
902219820Sjeff		tx_desc->ctrl.owner_opcode = op_own;
903219820Sjeff		wmb();
904219820Sjeff		writel(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL);
905219820Sjeff	}
906219820Sjeff
907219820Sjeff	return 0;
908219820Sjeff
909219820Sjefftx_drop:
910219820Sjeff	*mbp = NULL;
911219820Sjeff	m_freem(mb);
912219820Sjeff	ring->errors++;
913219820Sjeff	return EINVAL;
914219820Sjeff}
915219820Sjeff
916219820Sjeff
917219820Sjeffstatic int
918219820Sjeffmlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m)
919219820Sjeff{
920219820Sjeff	struct mlx4_en_priv *priv = netdev_priv(dev);
921219820Sjeff	struct mlx4_en_tx_ring *ring;
922219820Sjeff	struct mbuf *next;
923219820Sjeff	int enqueued, err = 0;
924219820Sjeff
925219820Sjeff	ring = &priv->tx_ring[tx_ind];
926219820Sjeff	if ((dev->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
927219820Sjeff	    IFF_DRV_RUNNING || priv->port_up == 0) {
928219820Sjeff		if (m != NULL)
929219820Sjeff			err = drbr_enqueue(dev, ring->br, m);
930219820Sjeff		return (err);
931219820Sjeff	}
932219820Sjeff
933219820Sjeff	enqueued = 0;
934246482Srrs	if (m != NULL) {
935219820Sjeff		if ((err = drbr_enqueue(dev, ring->br, m)) != 0)
936219820Sjeff			return (err);
937246482Srrs	}
938219820Sjeff	/* Process the queue */
939246581Sdelphij	while ((next = drbr_peek(dev, ring->br)) != NULL) {
940219820Sjeff		if ((err = mlx4_en_xmit(dev, tx_ind, &next)) != 0) {
941246482Srrs			if (next == NULL) {
942246581Sdelphij				drbr_advance(dev, ring->br);
943246482Srrs			} else {
944246581Sdelphij				drbr_putback(dev, ring->br, next);
945246482Srrs			}
946219820Sjeff			break;
947219820Sjeff		}
948246581Sdelphij		drbr_advance(dev, ring->br);
949219820Sjeff		enqueued++;
950241037Sglebius		dev->if_obytes += next->m_pkthdr.len;
951241037Sglebius		if (next->m_flags & M_MCAST)
952241037Sglebius			dev->if_omcasts++;
953219820Sjeff		/* Send a copy of the frame to the BPF listener */
954219820Sjeff		ETHER_BPF_MTAP(dev, next);
955219820Sjeff		if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
956219820Sjeff			break;
957219820Sjeff	}
958219820Sjeff
959219820Sjeff	if (enqueued > 0)
960219820Sjeff		ring->watchdog_time = ticks;
961219820Sjeff
962219820Sjeff	return (err);
963219820Sjeff}
964219820Sjeff
965219820Sjeffvoid
966219820Sjeffmlx4_en_tx_que(void *context, int pending)
967219820Sjeff{
968219820Sjeff	struct mlx4_en_tx_ring *ring;
969219820Sjeff	struct mlx4_en_priv *priv;
970219820Sjeff	struct net_device *dev;
971219820Sjeff	struct mlx4_en_cq *cq;
972219820Sjeff	int tx_ind;
973219820Sjeff
974219820Sjeff	cq = context;
975219820Sjeff	dev = cq->dev;
976219820Sjeff	priv = dev->if_softc;
977219820Sjeff	tx_ind = cq->ring;
978219820Sjeff	ring = &priv->tx_ring[tx_ind];
979219820Sjeff        if (dev->if_drv_flags & IFF_DRV_RUNNING) {
980219820Sjeff		mlx4_en_xmit_poll(priv, tx_ind);
981219820Sjeff		spin_lock(&ring->tx_lock);
982219820Sjeff                if (!drbr_empty(dev, ring->br))
983219820Sjeff			mlx4_en_transmit_locked(dev, tx_ind, NULL);
984219820Sjeff		spin_unlock(&ring->tx_lock);
985219820Sjeff	}
986219820Sjeff}
987219820Sjeff
988219820Sjeffint
989219820Sjeffmlx4_en_transmit(struct ifnet *dev, struct mbuf *m)
990219820Sjeff{
991219820Sjeff	struct mlx4_en_priv *priv = netdev_priv(dev);
992219820Sjeff	struct mlx4_en_tx_ring *ring;
993219820Sjeff	struct mlx4_en_cq *cq;
994219820Sjeff	int i = 0, err = 0;
995219820Sjeff
996219820Sjeff	/* Which queue to use */
997219820Sjeff	if ((m->m_flags & (M_FLOWID | M_VLANTAG)) == M_FLOWID)
998219820Sjeff		i = m->m_pkthdr.flowid % (MLX4_EN_NUM_HASH_RINGS - 1);
999219820Sjeff	else
1000219820Sjeff		i = mlx4_en_select_queue(dev, m);
1001219820Sjeff
1002219820Sjeff	ring = &priv->tx_ring[i];
1003219820Sjeff
1004219820Sjeff	if (spin_trylock(&ring->tx_lock)) {
1005219820Sjeff		err = mlx4_en_transmit_locked(dev, i, m);
1006219820Sjeff		spin_unlock(&ring->tx_lock);
1007219820Sjeff		/* Poll CQ here */
1008219820Sjeff		mlx4_en_xmit_poll(priv, i);
1009219820Sjeff	} else {
1010219820Sjeff		err = drbr_enqueue(dev, ring->br, m);
1011219820Sjeff		cq = &priv->tx_cq[i];
1012219820Sjeff		taskqueue_enqueue(cq->tq, &cq->cq_task);
1013219820Sjeff	}
1014219820Sjeff
1015219820Sjeff	return (err);
1016219820Sjeff}
1017219820Sjeff
1018219820Sjeff/*
1019219820Sjeff * Flush ring buffers.
1020219820Sjeff */
1021219820Sjeffvoid
1022219820Sjeffmlx4_en_qflush(struct ifnet *dev)
1023219820Sjeff{
1024219820Sjeff	struct mlx4_en_priv *priv = netdev_priv(dev);
1025219820Sjeff	struct mlx4_en_tx_ring *ring = priv->tx_ring;
1026219820Sjeff	struct mbuf *m;
1027219820Sjeff
1028219820Sjeff	for (int i = 0; i < priv->tx_ring_num; i++, ring++) {
1029219820Sjeff		spin_lock(&ring->tx_lock);
1030219820Sjeff		while ((m = buf_ring_dequeue_sc(ring->br)) != NULL)
1031219820Sjeff			m_freem(m);
1032219820Sjeff		spin_unlock(&ring->tx_lock);
1033219820Sjeff	}
1034219820Sjeff	if_qflush(dev);
1035219820Sjeff}
1036