mlx5_en_main.c revision 291184
1/*-
2 * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_main.c 291184 2015-11-23 09:32:32Z hselasky $
26 */
27
28#include "en.h"
29
30#include <sys/sockio.h>
31#include <machine/atomic.h>
32
33#define	ETH_DRIVER_VERSION	"3.1.0-dev"
34char mlx5e_version[] = "Mellanox Ethernet driver"
35    " (" ETH_DRIVER_VERSION ")";
36
37struct mlx5e_rq_param {
38	u32	rqc [MLX5_ST_SZ_DW(rqc)];
39	struct mlx5_wq_param wq;
40};
41
42struct mlx5e_sq_param {
43	u32	sqc [MLX5_ST_SZ_DW(sqc)];
44	struct mlx5_wq_param wq;
45};
46
47struct mlx5e_cq_param {
48	u32	cqc [MLX5_ST_SZ_DW(cqc)];
49	struct mlx5_wq_param wq;
50	u16	eq_ix;
51};
52
53struct mlx5e_channel_param {
54	struct mlx5e_rq_param rq;
55	struct mlx5e_sq_param sq;
56	struct mlx5e_cq_param rx_cq;
57	struct mlx5e_cq_param tx_cq;
58};
59
60static const struct {
61	u32	subtype;
62	u64	baudrate;
63}	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
64
65	[MLX5E_1000BASE_CX_SGMII] = {
66		.subtype = IFM_1000_CX_SGMII,
67		.baudrate = IF_Mbps(1000ULL),
68	},
69	[MLX5E_1000BASE_KX] = {
70		.subtype = IFM_1000_KX,
71		.baudrate = IF_Mbps(1000ULL),
72	},
73	[MLX5E_10GBASE_CX4] = {
74		.subtype = IFM_10G_CX4,
75		.baudrate = IF_Gbps(10ULL),
76	},
77	[MLX5E_10GBASE_KX4] = {
78		.subtype = IFM_10G_KX4,
79		.baudrate = IF_Gbps(10ULL),
80	},
81	[MLX5E_10GBASE_KR] = {
82		.subtype = IFM_10G_KR,
83		.baudrate = IF_Gbps(10ULL),
84	},
85	[MLX5E_20GBASE_KR2] = {
86		.subtype = IFM_20G_KR2,
87		.baudrate = IF_Gbps(20ULL),
88	},
89	[MLX5E_40GBASE_CR4] = {
90		.subtype = IFM_40G_CR4,
91		.baudrate = IF_Gbps(40ULL),
92	},
93	[MLX5E_40GBASE_KR4] = {
94		.subtype = IFM_40G_KR4,
95		.baudrate = IF_Gbps(40ULL),
96	},
97	[MLX5E_56GBASE_R4] = {
98		.subtype = IFM_56G_R4,
99		.baudrate = IF_Gbps(56ULL),
100	},
101	[MLX5E_10GBASE_CR] = {
102		.subtype = IFM_10G_CR1,
103		.baudrate = IF_Gbps(10ULL),
104	},
105	[MLX5E_10GBASE_SR] = {
106		.subtype = IFM_10G_SR,
107		.baudrate = IF_Gbps(10ULL),
108	},
109	[MLX5E_10GBASE_ER] = {
110		.subtype = IFM_10G_ER,
111		.baudrate = IF_Gbps(10ULL),
112	},
113	[MLX5E_40GBASE_SR4] = {
114		.subtype = IFM_40G_SR4,
115		.baudrate = IF_Gbps(40ULL),
116	},
117	[MLX5E_40GBASE_LR4] = {
118		.subtype = IFM_40G_LR4,
119		.baudrate = IF_Gbps(40ULL),
120	},
121	[MLX5E_100GBASE_CR4] = {
122		.subtype = IFM_100G_CR4,
123		.baudrate = IF_Gbps(100ULL),
124	},
125	[MLX5E_100GBASE_SR4] = {
126		.subtype = IFM_100G_SR4,
127		.baudrate = IF_Gbps(100ULL),
128	},
129	[MLX5E_100GBASE_KR4] = {
130		.subtype = IFM_100G_KR4,
131		.baudrate = IF_Gbps(100ULL),
132	},
133	[MLX5E_100GBASE_LR4] = {
134		.subtype = IFM_100G_LR4,
135		.baudrate = IF_Gbps(100ULL),
136	},
137	[MLX5E_100BASE_TX] = {
138		.subtype = IFM_100_TX,
139		.baudrate = IF_Mbps(100ULL),
140	},
141	[MLX5E_100BASE_T] = {
142		.subtype = IFM_100_T,
143		.baudrate = IF_Mbps(100ULL),
144	},
145	[MLX5E_10GBASE_T] = {
146		.subtype = IFM_10G_T,
147		.baudrate = IF_Gbps(10ULL),
148	},
149	[MLX5E_25GBASE_CR] = {
150		.subtype = IFM_25G_CR,
151		.baudrate = IF_Gbps(25ULL),
152	},
153	[MLX5E_25GBASE_KR] = {
154		.subtype = IFM_25G_KR,
155		.baudrate = IF_Gbps(25ULL),
156	},
157	[MLX5E_25GBASE_SR] = {
158		.subtype = IFM_25G_SR,
159		.baudrate = IF_Gbps(25ULL),
160	},
161	[MLX5E_50GBASE_CR2] = {
162		.subtype = IFM_50G_CR2,
163		.baudrate = IF_Gbps(50ULL),
164	},
165	[MLX5E_50GBASE_KR2] = {
166		.subtype = IFM_50G_KR2,
167		.baudrate = IF_Gbps(50ULL),
168	},
169};
170
171MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
172
173static void
174mlx5e_update_carrier(struct mlx5e_priv *priv)
175{
176	struct mlx5_core_dev *mdev = priv->mdev;
177	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
178	u32 eth_proto_oper;
179	int error;
180	u8 port_state;
181	u8 i;
182
183	port_state = mlx5_query_vport_state(mdev,
184	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT);
185
186	if (port_state == VPORT_STATE_UP) {
187		priv->media_status_last |= IFM_ACTIVE;
188	} else {
189		priv->media_status_last &= ~IFM_ACTIVE;
190		priv->media_active_last = IFM_ETHER;
191		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
192		return;
193	}
194
195	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
196	if (error) {
197		priv->media_active_last = IFM_ETHER;
198		priv->ifp->if_baudrate = 1;
199		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
200		    __func__, error);
201		return;
202	}
203	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
204
205	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
206		if (mlx5e_mode_table[i].baudrate == 0)
207			continue;
208		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
209			priv->ifp->if_baudrate =
210			    mlx5e_mode_table[i].baudrate;
211			priv->media_active_last =
212			    mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
213		}
214	}
215	if_link_state_change(priv->ifp, LINK_STATE_UP);
216}
217
218static void
219mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
220{
221	struct mlx5e_priv *priv = dev->if_softc;
222
223	ifmr->ifm_status = priv->media_status_last;
224	ifmr->ifm_active = priv->media_active_last |
225	    (priv->params_ethtool.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
226	    (priv->params_ethtool.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
227
228}
229
230static u32
231mlx5e_find_link_mode(u32 subtype)
232{
233	u32 i;
234	u32 link_mode = 0;
235
236	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
237		if (mlx5e_mode_table[i].baudrate == 0)
238			continue;
239		if (mlx5e_mode_table[i].subtype == subtype)
240			link_mode |= MLX5E_PROT_MASK(i);
241	}
242
243	return (link_mode);
244}
245
246static int
247mlx5e_media_change(struct ifnet *dev)
248{
249	struct mlx5e_priv *priv = dev->if_softc;
250	struct mlx5_core_dev *mdev = priv->mdev;
251	u32 eth_proto_cap;
252	u32 link_mode;
253	int locked;
254	int error;
255
256	locked = PRIV_LOCKED(priv);
257	if (!locked)
258		PRIV_LOCK(priv);
259
260	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
261		error = EINVAL;
262		goto done;
263	}
264	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
265
266	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
267	if (error) {
268		if_printf(dev, "Query port media capability failed\n");
269		goto done;
270	}
271	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO)
272		link_mode = eth_proto_cap;
273	else
274		link_mode = link_mode & eth_proto_cap;
275
276	if (!link_mode) {
277		if_printf(dev, "Not supported link mode requested\n");
278		error = EINVAL;
279		goto done;
280	}
281	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
282	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
283	mlx5_set_port_status(mdev, MLX5_PORT_UP);
284
285done:
286	if (!locked)
287		PRIV_UNLOCK(priv);
288	return (error);
289}
290
291static void
292mlx5e_update_carrier_work(struct work_struct *work)
293{
294	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
295	    update_carrier_work);
296
297	PRIV_LOCK(priv);
298	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
299		mlx5e_update_carrier(priv);
300	PRIV_UNLOCK(priv);
301}
302
303static void
304mlx5e_update_pport_counters(struct mlx5e_priv *priv)
305{
306	struct mlx5_core_dev *mdev = priv->mdev;
307	struct mlx5e_pport_stats *s = &priv->stats.pport;
308	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
309	u32 *in;
310	u32 *out;
311	u64 *ptr;
312	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
313	unsigned x;
314	unsigned y;
315
316	in = mlx5_vzalloc(sz);
317	out = mlx5_vzalloc(sz);
318	if (in == NULL || out == NULL)
319		goto free_out;
320
321	ptr = (uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
322
323	MLX5_SET(ppcnt_reg, in, local_port, 1);
324
325	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
326	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
327	for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
328		s->arg[y] = be64toh(ptr[x]);
329
330	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
331	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
332	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
333		s->arg[y] = be64toh(ptr[x]);
334	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
335	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
336		s_debug->arg[y] = be64toh(ptr[x]);
337
338	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
339	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
340	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
341		s_debug->arg[y] = be64toh(ptr[x]);
342
343	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
344	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
345	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
346		s_debug->arg[y] = be64toh(ptr[x]);
347free_out:
348	kvfree(in);
349	kvfree(out);
350}
351
352static void
353mlx5e_update_stats_work(struct work_struct *work)
354{
355	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
356	    update_stats_work);
357	struct mlx5_core_dev *mdev = priv->mdev;
358	struct mlx5e_vport_stats *s = &priv->stats.vport;
359	struct mlx5e_rq_stats *rq_stats;
360	struct mlx5e_sq_stats *sq_stats;
361	struct buf_ring *sq_br;
362#if (__FreeBSD_version < 1100000)
363	struct ifnet *ifp = priv->ifp;
364#endif
365
366	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
367	u32 *out;
368	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
369	u64 tso_packets = 0;
370	u64 tso_bytes = 0;
371	u64 tx_queue_dropped = 0;
372	u64 tx_defragged = 0;
373	u64 tx_offload_none = 0;
374	u64 lro_packets = 0;
375	u64 lro_bytes = 0;
376	u64 sw_lro_queued = 0;
377	u64 sw_lro_flushed = 0;
378	u64 rx_csum_none = 0;
379	u64 rx_wqe_err = 0;
380	u32 rx_out_of_buffer = 0;
381	int i;
382	int j;
383
384	PRIV_LOCK(priv);
385	out = mlx5_vzalloc(outlen);
386	if (out == NULL)
387		goto free_out;
388	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
389		goto free_out;
390
391	/* Collect firts the SW counters and then HW for consistency */
392	for (i = 0; i < priv->params.num_channels; i++) {
393		struct mlx5e_rq *rq = &priv->channel[i]->rq;
394
395		rq_stats = &priv->channel[i]->rq.stats;
396
397		/* collect stats from LRO */
398		rq_stats->sw_lro_queued = rq->lro.lro_queued;
399		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
400		sw_lro_queued += rq_stats->sw_lro_queued;
401		sw_lro_flushed += rq_stats->sw_lro_flushed;
402		lro_packets += rq_stats->lro_packets;
403		lro_bytes += rq_stats->lro_bytes;
404		rx_csum_none += rq_stats->csum_none;
405		rx_wqe_err += rq_stats->wqe_err;
406
407		for (j = 0; j < priv->num_tc; j++) {
408			sq_stats = &priv->channel[i]->sq[j].stats;
409			sq_br = priv->channel[i]->sq[j].br;
410
411			tso_packets += sq_stats->tso_packets;
412			tso_bytes += sq_stats->tso_bytes;
413			tx_queue_dropped += sq_stats->dropped;
414			tx_queue_dropped += sq_br->br_drops;
415			tx_defragged += sq_stats->defragged;
416			tx_offload_none += sq_stats->csum_offload_none;
417		}
418	}
419
420	/* update counters */
421	s->tso_packets = tso_packets;
422	s->tso_bytes = tso_bytes;
423	s->tx_queue_dropped = tx_queue_dropped;
424	s->tx_defragged = tx_defragged;
425	s->lro_packets = lro_packets;
426	s->lro_bytes = lro_bytes;
427	s->sw_lro_queued = sw_lro_queued;
428	s->sw_lro_flushed = sw_lro_flushed;
429	s->rx_csum_none = rx_csum_none;
430	s->rx_wqe_err = rx_wqe_err;
431
432	/* HW counters */
433	memset(in, 0, sizeof(in));
434
435	MLX5_SET(query_vport_counter_in, in, opcode,
436	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
437	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
438	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
439
440	memset(out, 0, outlen);
441
442	/* get number of out-of-buffer drops first */
443	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
444	    &rx_out_of_buffer))
445		goto free_out;
446
447	/* accumulate difference into a 64-bit counter */
448	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
449	s->rx_out_of_buffer_prev = rx_out_of_buffer;
450
451	/* get port statistics */
452	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
453		goto free_out;
454
455#define	MLX5_GET_CTR(out, x) \
456	MLX5_GET64(query_vport_counter_out, out, x)
457
458	s->rx_error_packets =
459	    MLX5_GET_CTR(out, received_errors.packets);
460	s->rx_error_bytes =
461	    MLX5_GET_CTR(out, received_errors.octets);
462	s->tx_error_packets =
463	    MLX5_GET_CTR(out, transmit_errors.packets);
464	s->tx_error_bytes =
465	    MLX5_GET_CTR(out, transmit_errors.octets);
466
467	s->rx_unicast_packets =
468	    MLX5_GET_CTR(out, received_eth_unicast.packets);
469	s->rx_unicast_bytes =
470	    MLX5_GET_CTR(out, received_eth_unicast.octets);
471	s->tx_unicast_packets =
472	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
473	s->tx_unicast_bytes =
474	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
475
476	s->rx_multicast_packets =
477	    MLX5_GET_CTR(out, received_eth_multicast.packets);
478	s->rx_multicast_bytes =
479	    MLX5_GET_CTR(out, received_eth_multicast.octets);
480	s->tx_multicast_packets =
481	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
482	s->tx_multicast_bytes =
483	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
484
485	s->rx_broadcast_packets =
486	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
487	s->rx_broadcast_bytes =
488	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
489	s->tx_broadcast_packets =
490	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
491	s->tx_broadcast_bytes =
492	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
493
494	s->rx_packets =
495	    s->rx_unicast_packets +
496	    s->rx_multicast_packets +
497	    s->rx_broadcast_packets -
498	    s->rx_out_of_buffer;
499	s->rx_bytes =
500	    s->rx_unicast_bytes +
501	    s->rx_multicast_bytes +
502	    s->rx_broadcast_bytes;
503	s->tx_packets =
504	    s->tx_unicast_packets +
505	    s->tx_multicast_packets +
506	    s->tx_broadcast_packets;
507	s->tx_bytes =
508	    s->tx_unicast_bytes +
509	    s->tx_multicast_bytes +
510	    s->tx_broadcast_bytes;
511
512	/* Update calculated offload counters */
513	s->tx_csum_offload = s->tx_packets - tx_offload_none;
514	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
515
516	/* Update per port counters */
517	mlx5e_update_pport_counters(priv);
518
519#if (__FreeBSD_version < 1100000)
520	/* no get_counters interface in fbsd 10 */
521	ifp->if_ipackets = s->rx_packets;
522	ifp->if_ierrors = s->rx_error_packets;
523	ifp->if_iqdrops = s->rx_out_of_buffer;
524	ifp->if_opackets = s->tx_packets;
525	ifp->if_oerrors = s->tx_error_packets;
526	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
527	ifp->if_ibytes = s->rx_bytes;
528	ifp->if_obytes = s->tx_bytes;
529#endif
530
531free_out:
532	kvfree(out);
533	PRIV_UNLOCK(priv);
534}
535
536static void
537mlx5e_update_stats(void *arg)
538{
539	struct mlx5e_priv *priv = arg;
540
541	schedule_work(&priv->update_stats_work);
542
543	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
544}
545
546static void
547mlx5e_async_event_sub(struct mlx5e_priv *priv,
548    enum mlx5_dev_event event)
549{
550	switch (event) {
551	case MLX5_DEV_EVENT_PORT_UP:
552	case MLX5_DEV_EVENT_PORT_DOWN:
553		schedule_work(&priv->update_carrier_work);
554		break;
555
556	default:
557		break;
558	}
559}
560
561static void
562mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
563    enum mlx5_dev_event event, unsigned long param)
564{
565	struct mlx5e_priv *priv = vpriv;
566
567	mtx_lock(&priv->async_events_mtx);
568	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
569		mlx5e_async_event_sub(priv, event);
570	mtx_unlock(&priv->async_events_mtx);
571}
572
573static void
574mlx5e_enable_async_events(struct mlx5e_priv *priv)
575{
576	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
577}
578
579static void
580mlx5e_disable_async_events(struct mlx5e_priv *priv)
581{
582	mtx_lock(&priv->async_events_mtx);
583	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
584	mtx_unlock(&priv->async_events_mtx);
585}
586
587static const char *mlx5e_rq_stats_desc[] = {
588	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
589};
590
591static int
592mlx5e_create_rq(struct mlx5e_channel *c,
593    struct mlx5e_rq_param *param,
594    struct mlx5e_rq *rq)
595{
596	struct mlx5e_priv *priv = c->priv;
597	struct mlx5_core_dev *mdev = priv->mdev;
598	char buffer[16];
599	void *rqc = param->rqc;
600	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
601	int wq_sz;
602	int err;
603	int i;
604
605	/* Create DMA descriptor TAG */
606	if ((err = -bus_dma_tag_create(
607	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
608	    1,				/* any alignment */
609	    0,				/* no boundary */
610	    BUS_SPACE_MAXADDR,		/* lowaddr */
611	    BUS_SPACE_MAXADDR,		/* highaddr */
612	    NULL, NULL,			/* filter, filterarg */
613	    MJUM16BYTES,		/* maxsize */
614	    1,				/* nsegments */
615	    MJUM16BYTES,		/* maxsegsize */
616	    0,				/* flags */
617	    NULL, NULL,			/* lockfunc, lockfuncarg */
618	    &rq->dma_tag)))
619		goto done;
620
621	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
622	    &rq->wq_ctrl);
623	if (err)
624		goto err_free_dma_tag;
625
626	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
627
628	if (priv->params.hw_lro_en) {
629		rq->wqe_sz = priv->params.lro_wqe_sz;
630	} else {
631		rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
632	}
633	if (rq->wqe_sz > MJUM16BYTES) {
634		err = -ENOMEM;
635		goto err_rq_wq_destroy;
636	} else if (rq->wqe_sz > MJUM9BYTES) {
637		rq->wqe_sz = MJUM16BYTES;
638	} else if (rq->wqe_sz > MJUMPAGESIZE) {
639		rq->wqe_sz = MJUM9BYTES;
640	} else if (rq->wqe_sz > MCLBYTES) {
641		rq->wqe_sz = MJUMPAGESIZE;
642	} else {
643		rq->wqe_sz = MCLBYTES;
644	}
645
646	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
647	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
648	if (rq->mbuf == NULL) {
649		err = -ENOMEM;
650		goto err_rq_wq_destroy;
651	}
652	for (i = 0; i != wq_sz; i++) {
653		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
654		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
655
656		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
657		if (err != 0) {
658			while (i--)
659				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
660			goto err_rq_mbuf_free;
661		}
662		wqe->data.lkey = c->mkey_be;
663		wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
664	}
665
666	rq->pdev = c->pdev;
667	rq->ifp = c->ifp;
668	rq->channel = c;
669	rq->ix = c->ix;
670
671	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
672	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
673	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
674	    rq->stats.arg);
675
676#ifdef HAVE_TURBO_LRO
677	if (tcp_tlro_init(&rq->lro, c->ifp, MLX5E_BUDGET_MAX) != 0)
678		rq->lro.mbuf = NULL;
679#else
680	if (tcp_lro_init(&rq->lro))
681		rq->lro.lro_cnt = 0;
682	else
683		rq->lro.ifp = c->ifp;
684#endif
685	return (0);
686
687err_rq_mbuf_free:
688	free(rq->mbuf, M_MLX5EN);
689err_rq_wq_destroy:
690	mlx5_wq_destroy(&rq->wq_ctrl);
691err_free_dma_tag:
692	bus_dma_tag_destroy(rq->dma_tag);
693done:
694	return (err);
695}
696
697static void
698mlx5e_destroy_rq(struct mlx5e_rq *rq)
699{
700	int wq_sz;
701	int i;
702
703	/* destroy all sysctl nodes */
704	sysctl_ctx_free(&rq->stats.ctx);
705
706	/* free leftover LRO packets, if any */
707#ifdef HAVE_TURBO_LRO
708	tcp_tlro_free(&rq->lro);
709#else
710	tcp_lro_free(&rq->lro);
711#endif
712	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
713	for (i = 0; i != wq_sz; i++) {
714		if (rq->mbuf[i].mbuf != NULL) {
715			bus_dmamap_unload(rq->dma_tag,
716			    rq->mbuf[i].dma_map);
717			m_freem(rq->mbuf[i].mbuf);
718		}
719		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
720	}
721	free(rq->mbuf, M_MLX5EN);
722	mlx5_wq_destroy(&rq->wq_ctrl);
723}
724
725static int
726mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
727{
728	struct mlx5e_channel *c = rq->channel;
729	struct mlx5e_priv *priv = c->priv;
730	struct mlx5_core_dev *mdev = priv->mdev;
731
732	void *in;
733	void *rqc;
734	void *wq;
735	int inlen;
736	int err;
737
738	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
739	    sizeof(u64) * rq->wq_ctrl.buf.npages;
740	in = mlx5_vzalloc(inlen);
741	if (in == NULL)
742		return (-ENOMEM);
743
744	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
745	wq = MLX5_ADDR_OF(rqc, rqc, wq);
746
747	memcpy(rqc, param->rqc, sizeof(param->rqc));
748
749	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
750	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
751	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
752	if (priv->counter_set_id >= 0)
753		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
754	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
755	    PAGE_SHIFT);
756	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
757
758	mlx5_fill_page_array(&rq->wq_ctrl.buf,
759	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
760
761	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
762
763	kvfree(in);
764
765	return (err);
766}
767
768static int
769mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
770{
771	struct mlx5e_channel *c = rq->channel;
772	struct mlx5e_priv *priv = c->priv;
773	struct mlx5_core_dev *mdev = priv->mdev;
774
775	void *in;
776	void *rqc;
777	int inlen;
778	int err;
779
780	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
781	in = mlx5_vzalloc(inlen);
782	if (in == NULL)
783		return (-ENOMEM);
784
785	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
786
787	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
788	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
789	MLX5_SET(rqc, rqc, state, next_state);
790
791	err = mlx5_core_modify_rq(mdev, in, inlen);
792
793	kvfree(in);
794
795	return (err);
796}
797
798static void
799mlx5e_disable_rq(struct mlx5e_rq *rq)
800{
801	struct mlx5e_channel *c = rq->channel;
802	struct mlx5e_priv *priv = c->priv;
803	struct mlx5_core_dev *mdev = priv->mdev;
804
805	mlx5_core_destroy_rq(mdev, rq->rqn);
806}
807
808static int
809mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
810{
811	struct mlx5e_channel *c = rq->channel;
812	struct mlx5e_priv *priv = c->priv;
813	struct mlx5_wq_ll *wq = &rq->wq;
814	int i;
815
816	for (i = 0; i < 1000; i++) {
817		if (wq->cur_sz >= priv->params.min_rx_wqes)
818			return (0);
819
820		msleep(4);
821	}
822	return (-ETIMEDOUT);
823}
824
825static int
826mlx5e_open_rq(struct mlx5e_channel *c,
827    struct mlx5e_rq_param *param,
828    struct mlx5e_rq *rq)
829{
830	int err;
831	int i;
832
833	err = mlx5e_create_rq(c, param, rq);
834	if (err)
835		return (err);
836
837	err = mlx5e_enable_rq(rq, param);
838	if (err)
839		goto err_destroy_rq;
840
841	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
842	if (err)
843		goto err_disable_rq;
844
845	c->rq.enabled = 1;
846
847	/*
848	 * Test send queues, which will trigger
849	 * "mlx5e_post_rx_wqes()":
850	 */
851	for (i = 0; i != c->num_tc; i++)
852		mlx5e_send_nop(&c->sq[i], 1, true);
853	return (0);
854
855err_disable_rq:
856	mlx5e_disable_rq(rq);
857err_destroy_rq:
858	mlx5e_destroy_rq(rq);
859
860	return (err);
861}
862
863static void
864mlx5e_close_rq(struct mlx5e_rq *rq)
865{
866	rq->enabled = 0;
867	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
868}
869
870static void
871mlx5e_close_rq_wait(struct mlx5e_rq *rq)
872{
873	/* wait till RQ is empty */
874	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
875		msleep(4);
876		rq->cq.mcq.comp(&rq->cq.mcq);
877	}
878
879	mlx5e_disable_rq(rq);
880	mlx5e_destroy_rq(rq);
881}
882
883static void
884mlx5e_free_sq_db(struct mlx5e_sq *sq)
885{
886	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
887	int x;
888
889	for (x = 0; x != wq_sz; x++)
890		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
891	free(sq->mbuf, M_MLX5EN);
892}
893
894static int
895mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
896{
897	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
898	int err;
899	int x;
900
901	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
902	if (sq->mbuf == NULL)
903		return (-ENOMEM);
904
905	/* Create DMA descriptor MAPs */
906	for (x = 0; x != wq_sz; x++) {
907		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
908		if (err != 0) {
909			while (x--)
910				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
911			free(sq->mbuf, M_MLX5EN);
912			return (err);
913		}
914	}
915	return (0);
916}
917
918static const char *mlx5e_sq_stats_desc[] = {
919	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
920};
921
922static int
923mlx5e_create_sq(struct mlx5e_channel *c,
924    int tc,
925    struct mlx5e_sq_param *param,
926    struct mlx5e_sq *sq)
927{
928	struct mlx5e_priv *priv = c->priv;
929	struct mlx5_core_dev *mdev = priv->mdev;
930	char buffer[16];
931
932	void *sqc = param->sqc;
933	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
934	int err;
935
936	/* Create DMA descriptor TAG */
937	if ((err = -bus_dma_tag_create(
938	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
939	    1,				/* any alignment */
940	    0,				/* no boundary */
941	    BUS_SPACE_MAXADDR,		/* lowaddr */
942	    BUS_SPACE_MAXADDR,		/* highaddr */
943	    NULL, NULL,			/* filter, filterarg */
944	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
945	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
946	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
947	    0,				/* flags */
948	    NULL, NULL,			/* lockfunc, lockfuncarg */
949	    &sq->dma_tag)))
950		goto done;
951
952	err = mlx5_alloc_map_uar(mdev, &sq->uar);
953	if (err)
954		goto err_free_dma_tag;
955
956	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
957	    &sq->wq_ctrl);
958	if (err)
959		goto err_unmap_free_uar;
960
961	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
962	sq->uar_map = sq->uar.map;
963	sq->uar_bf_map = sq->uar.bf_map;
964	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
965
966	err = mlx5e_alloc_sq_db(sq);
967	if (err)
968		goto err_sq_wq_destroy;
969
970	sq->pdev = c->pdev;
971	sq->mkey_be = c->mkey_be;
972	sq->channel = c;
973	sq->tc = tc;
974
975	sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
976	    M_WAITOK, &sq->lock);
977	if (sq->br == NULL) {
978		if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
979		    __func__);
980		err = -ENOMEM;
981		goto err_free_sq_db;
982	}
983
984	sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
985	    taskqueue_thread_enqueue, &sq->sq_tq);
986	if (sq->sq_tq == NULL) {
987		if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
988		    __func__);
989		err = -ENOMEM;
990		goto err_free_drbr;
991	}
992
993	TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
994	taskqueue_start_threads(&sq->sq_tq, 1, PI_NET, "%s tx sq",
995	    c->ifp->if_xname);
996
997	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
998	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
999	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1000	    sq->stats.arg);
1001
1002	return (0);
1003
1004err_free_drbr:
1005	buf_ring_free(sq->br, M_MLX5EN);
1006err_free_sq_db:
1007	mlx5e_free_sq_db(sq);
1008err_sq_wq_destroy:
1009	mlx5_wq_destroy(&sq->wq_ctrl);
1010
1011err_unmap_free_uar:
1012	mlx5_unmap_free_uar(mdev, &sq->uar);
1013
1014err_free_dma_tag:
1015	bus_dma_tag_destroy(sq->dma_tag);
1016done:
1017	return (err);
1018}
1019
1020static void
1021mlx5e_destroy_sq(struct mlx5e_sq *sq)
1022{
1023	struct mlx5e_channel *c = sq->channel;
1024	struct mlx5e_priv *priv = c->priv;
1025
1026	/* destroy all sysctl nodes */
1027	sysctl_ctx_free(&sq->stats.ctx);
1028
1029	mlx5e_free_sq_db(sq);
1030	mlx5_wq_destroy(&sq->wq_ctrl);
1031	mlx5_unmap_free_uar(priv->mdev, &sq->uar);
1032	taskqueue_drain(sq->sq_tq, &sq->sq_task);
1033	taskqueue_free(sq->sq_tq);
1034	buf_ring_free(sq->br, M_MLX5EN);
1035}
1036
1037static int
1038mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
1039{
1040	struct mlx5e_channel *c = sq->channel;
1041	struct mlx5e_priv *priv = c->priv;
1042	struct mlx5_core_dev *mdev = priv->mdev;
1043
1044	void *in;
1045	void *sqc;
1046	void *wq;
1047	int inlen;
1048	int err;
1049
1050	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1051	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1052	in = mlx5_vzalloc(inlen);
1053	if (in == NULL)
1054		return (-ENOMEM);
1055
1056	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1057	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1058
1059	memcpy(sqc, param->sqc, sizeof(param->sqc));
1060
1061	MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]);
1062	MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn);
1063	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1064	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1065	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1066
1067	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1068	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1069	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1070	    PAGE_SHIFT);
1071	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1072
1073	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1074	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1075
1076	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
1077
1078	kvfree(in);
1079
1080	return (err);
1081}
1082
1083static int
1084mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1085{
1086	struct mlx5e_channel *c = sq->channel;
1087	struct mlx5e_priv *priv = c->priv;
1088	struct mlx5_core_dev *mdev = priv->mdev;
1089
1090	void *in;
1091	void *sqc;
1092	int inlen;
1093	int err;
1094
1095	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1096	in = mlx5_vzalloc(inlen);
1097	if (in == NULL)
1098		return (-ENOMEM);
1099
1100	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1101
1102	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1103	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1104	MLX5_SET(sqc, sqc, state, next_state);
1105
1106	err = mlx5_core_modify_sq(mdev, in, inlen);
1107
1108	kvfree(in);
1109
1110	return (err);
1111}
1112
1113static void
1114mlx5e_disable_sq(struct mlx5e_sq *sq)
1115{
1116	struct mlx5e_channel *c = sq->channel;
1117	struct mlx5e_priv *priv = c->priv;
1118	struct mlx5_core_dev *mdev = priv->mdev;
1119
1120	mlx5_core_destroy_sq(mdev, sq->sqn);
1121}
1122
1123static int
1124mlx5e_open_sq(struct mlx5e_channel *c,
1125    int tc,
1126    struct mlx5e_sq_param *param,
1127    struct mlx5e_sq *sq)
1128{
1129	int err;
1130
1131	err = mlx5e_create_sq(c, tc, param, sq);
1132	if (err)
1133		return (err);
1134
1135	err = mlx5e_enable_sq(sq, param);
1136	if (err)
1137		goto err_destroy_sq;
1138
1139	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1140	if (err)
1141		goto err_disable_sq;
1142
1143	atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1144
1145	return (0);
1146
1147err_disable_sq:
1148	mlx5e_disable_sq(sq);
1149err_destroy_sq:
1150	mlx5e_destroy_sq(sq);
1151
1152	return (err);
1153}
1154
1155static void
1156mlx5e_close_sq(struct mlx5e_sq *sq)
1157{
1158
1159	/* ensure hw is notified of all pending wqes */
1160	if (mlx5e_sq_has_room_for(sq, 1))
1161		mlx5e_send_nop(sq, 1, true);
1162
1163	mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1164}
1165
1166static void
1167mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1168{
1169	/* wait till SQ is empty */
1170	while (sq->cc != sq->pc) {
1171		msleep(4);
1172		sq->cq.mcq.comp(&sq->cq.mcq);
1173	}
1174
1175	mlx5e_disable_sq(sq);
1176	mlx5e_destroy_sq(sq);
1177}
1178
1179static int
1180mlx5e_create_cq(struct mlx5e_channel *c,
1181    struct mlx5e_cq_param *param,
1182    struct mlx5e_cq *cq,
1183    mlx5e_cq_comp_t *comp)
1184{
1185	struct mlx5e_priv *priv = c->priv;
1186	struct mlx5_core_dev *mdev = priv->mdev;
1187	struct mlx5_core_cq *mcq = &cq->mcq;
1188	int eqn_not_used;
1189	int irqn;
1190	int err;
1191	u32 i;
1192
1193	param->wq.buf_numa_node = 0;
1194	param->wq.db_numa_node = 0;
1195	param->eq_ix = c->ix;
1196
1197	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1198	    &cq->wq_ctrl);
1199	if (err)
1200		return (err);
1201
1202	mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
1203
1204	mcq->cqe_sz = 64;
1205	mcq->set_ci_db = cq->wq_ctrl.db.db;
1206	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1207	*mcq->set_ci_db = 0;
1208	*mcq->arm_db = 0;
1209	mcq->vector = param->eq_ix;
1210	mcq->comp = comp;
1211	mcq->event = mlx5e_cq_error_event;
1212	mcq->irqn = irqn;
1213	mcq->uar = &priv->cq_uar;
1214
1215	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1216		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1217
1218		cqe->op_own = 0xf1;
1219	}
1220
1221	cq->channel = c;
1222
1223	return (0);
1224}
1225
1226static void
1227mlx5e_destroy_cq(struct mlx5e_cq *cq)
1228{
1229	mlx5_wq_destroy(&cq->wq_ctrl);
1230}
1231
1232static int
1233mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param,
1234    u8 moderation_mode)
1235{
1236	struct mlx5e_channel *c = cq->channel;
1237	struct mlx5e_priv *priv = c->priv;
1238	struct mlx5_core_dev *mdev = priv->mdev;
1239	struct mlx5_core_cq *mcq = &cq->mcq;
1240	void *in;
1241	void *cqc;
1242	int inlen;
1243	int irqn_not_used;
1244	int eqn;
1245	int err;
1246
1247	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1248	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1249	in = mlx5_vzalloc(inlen);
1250	if (in == NULL)
1251		return (-ENOMEM);
1252
1253	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1254
1255	memcpy(cqc, param->cqc, sizeof(param->cqc));
1256
1257	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1258	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1259
1260	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
1261
1262	MLX5_SET(cqc, cqc, cq_period_mode, moderation_mode);
1263	MLX5_SET(cqc, cqc, c_eqn, eqn);
1264	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1265	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1266	    PAGE_SHIFT);
1267	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1268
1269	err = mlx5_core_create_cq(mdev, mcq, in, inlen);
1270
1271	kvfree(in);
1272
1273	if (err)
1274		return (err);
1275
1276	mlx5e_cq_arm(cq);
1277
1278	return (0);
1279}
1280
1281static void
1282mlx5e_disable_cq(struct mlx5e_cq *cq)
1283{
1284	struct mlx5e_channel *c = cq->channel;
1285	struct mlx5e_priv *priv = c->priv;
1286	struct mlx5_core_dev *mdev = priv->mdev;
1287
1288	mlx5_core_destroy_cq(mdev, &cq->mcq);
1289}
1290
1291static int
1292mlx5e_open_cq(struct mlx5e_channel *c,
1293    struct mlx5e_cq_param *param,
1294    struct mlx5e_cq *cq,
1295    mlx5e_cq_comp_t *comp,
1296    u8 moderation_mode)
1297{
1298	int err;
1299
1300	err = mlx5e_create_cq(c, param, cq, comp);
1301	if (err)
1302		return (err);
1303
1304	err = mlx5e_enable_cq(cq, param, moderation_mode);
1305	if (err)
1306		goto err_destroy_cq;
1307
1308	return (0);
1309
1310err_destroy_cq:
1311	mlx5e_destroy_cq(cq);
1312
1313	return (err);
1314}
1315
1316static void
1317mlx5e_close_cq(struct mlx5e_cq *cq)
1318{
1319	mlx5e_disable_cq(cq);
1320	mlx5e_destroy_cq(cq);
1321}
1322
1323static int
1324mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1325    struct mlx5e_channel_param *cparam)
1326{
1327	int err;
1328	int tc;
1329
1330	for (tc = 0; tc < c->num_tc; tc++) {
1331		/* open completion queue */
1332		err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
1333		    &mlx5e_tx_cq_comp, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1334		if (err)
1335			goto err_close_tx_cqs;
1336	}
1337	return (0);
1338
1339err_close_tx_cqs:
1340	for (tc--; tc >= 0; tc--)
1341		mlx5e_close_cq(&c->sq[tc].cq);
1342
1343	return (err);
1344}
1345
1346static void
1347mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1348{
1349	int tc;
1350
1351	for (tc = 0; tc < c->num_tc; tc++)
1352		mlx5e_close_cq(&c->sq[tc].cq);
1353}
1354
1355static int
1356mlx5e_open_sqs(struct mlx5e_channel *c,
1357    struct mlx5e_channel_param *cparam)
1358{
1359	int err;
1360	int tc;
1361
1362	for (tc = 0; tc < c->num_tc; tc++) {
1363		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1364		if (err)
1365			goto err_close_sqs;
1366	}
1367
1368	return (0);
1369
1370err_close_sqs:
1371	for (tc--; tc >= 0; tc--) {
1372		mlx5e_close_sq(&c->sq[tc]);
1373		mlx5e_close_sq_wait(&c->sq[tc]);
1374	}
1375
1376	return (err);
1377}
1378
1379static void
1380mlx5e_close_sqs(struct mlx5e_channel *c)
1381{
1382	int tc;
1383
1384	for (tc = 0; tc < c->num_tc; tc++)
1385		mlx5e_close_sq(&c->sq[tc]);
1386}
1387
1388static void
1389mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1390{
1391	int tc;
1392
1393	for (tc = 0; tc < c->num_tc; tc++)
1394		mlx5e_close_sq_wait(&c->sq[tc]);
1395}
1396
1397static void
1398mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1399{
1400	int tc;
1401
1402	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1403
1404	for (tc = 0; tc < c->num_tc; tc++) {
1405		mtx_init(&c->sq[tc].lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF);
1406		mtx_init(&c->sq[tc].comp_lock, "mlx5comp", MTX_NETWORK_LOCK,
1407		    MTX_DEF);
1408	}
1409}
1410
1411static void
1412mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1413{
1414	int tc;
1415
1416	mtx_destroy(&c->rq.mtx);
1417
1418	for (tc = 0; tc < c->num_tc; tc++) {
1419		mtx_destroy(&c->sq[tc].lock);
1420		mtx_destroy(&c->sq[tc].comp_lock);
1421	}
1422}
1423
1424static int
1425mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1426    struct mlx5e_channel_param *cparam,
1427    struct mlx5e_channel *volatile *cp)
1428{
1429	struct mlx5e_channel *c;
1430	u8 rx_moderation_mode;
1431	int err;
1432
1433	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1434	if (c == NULL)
1435		return (-ENOMEM);
1436
1437	c->priv = priv;
1438	c->ix = ix;
1439	c->cpu = 0;
1440	c->pdev = &priv->mdev->pdev->dev;
1441	c->ifp = priv->ifp;
1442	c->mkey_be = cpu_to_be32(priv->mr.key);
1443	c->num_tc = priv->num_tc;
1444
1445	/* init mutexes */
1446	mlx5e_chan_mtx_init(c);
1447
1448	/* open transmit completion queue */
1449	err = mlx5e_open_tx_cqs(c, cparam);
1450	if (err)
1451		goto err_free;
1452
1453	switch (priv->params.rx_cq_moderation_mode) {
1454	case 0:
1455		rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1456		break;
1457	default:
1458		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1459			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1460		else
1461			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1462		break;
1463	}
1464
1465	/* open receive completion queue */
1466	err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
1467	    &mlx5e_rx_cq_comp, rx_moderation_mode);
1468	if (err)
1469		goto err_close_tx_cqs;
1470
1471	err = mlx5e_open_sqs(c, cparam);
1472	if (err)
1473		goto err_close_rx_cq;
1474
1475	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1476	if (err)
1477		goto err_close_sqs;
1478
1479	/* store channel pointer */
1480	*cp = c;
1481
1482	/* poll receive queue initially */
1483	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1484
1485	return (0);
1486
1487err_close_sqs:
1488	mlx5e_close_sqs(c);
1489	mlx5e_close_sqs_wait(c);
1490
1491err_close_rx_cq:
1492	mlx5e_close_cq(&c->rq.cq);
1493
1494err_close_tx_cqs:
1495	mlx5e_close_tx_cqs(c);
1496
1497err_free:
1498	/* destroy mutexes */
1499	mlx5e_chan_mtx_destroy(c);
1500	free(c, M_MLX5EN);
1501	return (err);
1502}
1503
1504static void
1505mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1506{
1507	struct mlx5e_channel *c = *pp;
1508
1509	/* check if channel is already closed */
1510	if (c == NULL)
1511		return;
1512	mlx5e_close_rq(&c->rq);
1513	mlx5e_close_sqs(c);
1514}
1515
1516static void
1517mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1518{
1519	struct mlx5e_channel *c = *pp;
1520
1521	/* check if channel is already closed */
1522	if (c == NULL)
1523		return;
1524	/* ensure channel pointer is no longer used */
1525	*pp = NULL;
1526
1527	mlx5e_close_rq_wait(&c->rq);
1528	mlx5e_close_sqs_wait(c);
1529	mlx5e_close_cq(&c->rq.cq);
1530	mlx5e_close_tx_cqs(c);
1531	/* destroy mutexes */
1532	mlx5e_chan_mtx_destroy(c);
1533	free(c, M_MLX5EN);
1534}
1535
1536static void
1537mlx5e_build_rq_param(struct mlx5e_priv *priv,
1538    struct mlx5e_rq_param *param)
1539{
1540	void *rqc = param->rqc;
1541	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1542
1543	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1544	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1545	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1546	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1547	MLX5_SET(wq, wq, pd, priv->pdn);
1548
1549	param->wq.buf_numa_node = 0;
1550	param->wq.db_numa_node = 0;
1551	param->wq.linear = 1;
1552}
1553
1554static void
1555mlx5e_build_sq_param(struct mlx5e_priv *priv,
1556    struct mlx5e_sq_param *param)
1557{
1558	void *sqc = param->sqc;
1559	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1560
1561	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1562	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1563	MLX5_SET(wq, wq, pd, priv->pdn);
1564
1565	param->wq.buf_numa_node = 0;
1566	param->wq.db_numa_node = 0;
1567	param->wq.linear = 1;
1568}
1569
1570static void
1571mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1572    struct mlx5e_cq_param *param)
1573{
1574	void *cqc = param->cqc;
1575
1576	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1577}
1578
1579static void
1580mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1581    struct mlx5e_cq_param *param)
1582{
1583	void *cqc = param->cqc;
1584
1585	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1586	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1587	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1588
1589	mlx5e_build_common_cq_param(priv, param);
1590}
1591
1592static void
1593mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1594    struct mlx5e_cq_param *param)
1595{
1596	void *cqc = param->cqc;
1597
1598	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1599	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1600	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1601
1602	mlx5e_build_common_cq_param(priv, param);
1603}
1604
1605static void
1606mlx5e_build_channel_param(struct mlx5e_priv *priv,
1607    struct mlx5e_channel_param *cparam)
1608{
1609	memset(cparam, 0, sizeof(*cparam));
1610
1611	mlx5e_build_rq_param(priv, &cparam->rq);
1612	mlx5e_build_sq_param(priv, &cparam->sq);
1613	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1614	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1615}
1616
1617static int
1618mlx5e_open_channels(struct mlx5e_priv *priv)
1619{
1620	struct mlx5e_channel_param cparam;
1621	void *ptr;
1622	int err;
1623	int i;
1624	int j;
1625
1626	priv->channel = malloc(priv->params.num_channels *
1627	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1628	if (priv->channel == NULL)
1629		return (-ENOMEM);
1630
1631	mlx5e_build_channel_param(priv, &cparam);
1632	for (i = 0; i < priv->params.num_channels; i++) {
1633		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1634		if (err)
1635			goto err_close_channels;
1636	}
1637
1638	for (j = 0; j < priv->params.num_channels; j++) {
1639		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1640		if (err)
1641			goto err_close_channels;
1642	}
1643
1644	return (0);
1645
1646err_close_channels:
1647	for (i--; i >= 0; i--) {
1648		mlx5e_close_channel(&priv->channel[i]);
1649		mlx5e_close_channel_wait(&priv->channel[i]);
1650	}
1651
1652	/* remove "volatile" attribute from "channel" pointer */
1653	ptr = __DECONST(void *, priv->channel);
1654	priv->channel = NULL;
1655
1656	free(ptr, M_MLX5EN);
1657
1658	return (err);
1659}
1660
1661static void
1662mlx5e_close_channels(struct mlx5e_priv *priv)
1663{
1664	void *ptr;
1665	int i;
1666
1667	if (priv->channel == NULL)
1668		return;
1669
1670	for (i = 0; i < priv->params.num_channels; i++)
1671		mlx5e_close_channel(&priv->channel[i]);
1672	for (i = 0; i < priv->params.num_channels; i++)
1673		mlx5e_close_channel_wait(&priv->channel[i]);
1674
1675	/* remove "volatile" attribute from "channel" pointer */
1676	ptr = __DECONST(void *, priv->channel);
1677	priv->channel = NULL;
1678
1679	free(ptr, M_MLX5EN);
1680}
1681
1682static int
1683mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
1684{
1685	struct mlx5_core_dev *mdev = priv->mdev;
1686	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
1687	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1688
1689	memset(in, 0, sizeof(in));
1690
1691	MLX5_SET(tisc, tisc, prio, tc);
1692	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
1693
1694	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
1695}
1696
1697static void
1698mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
1699{
1700	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
1701}
1702
1703static int
1704mlx5e_open_tises(struct mlx5e_priv *priv)
1705{
1706	int num_tc = priv->num_tc;
1707	int err;
1708	int tc;
1709
1710	for (tc = 0; tc < num_tc; tc++) {
1711		err = mlx5e_open_tis(priv, tc);
1712		if (err)
1713			goto err_close_tises;
1714	}
1715
1716	return (0);
1717
1718err_close_tises:
1719	for (tc--; tc >= 0; tc--)
1720		mlx5e_close_tis(priv, tc);
1721
1722	return (err);
1723}
1724
1725static void
1726mlx5e_close_tises(struct mlx5e_priv *priv)
1727{
1728	int num_tc = priv->num_tc;
1729	int tc;
1730
1731	for (tc = 0; tc < num_tc; tc++)
1732		mlx5e_close_tis(priv, tc);
1733}
1734
1735static int
1736mlx5e_open_rqt(struct mlx5e_priv *priv)
1737{
1738	struct mlx5_core_dev *mdev = priv->mdev;
1739	u32 *in;
1740	u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
1741	void *rqtc;
1742	int inlen;
1743	int err;
1744	int sz;
1745	int i;
1746
1747	sz = 1 << priv->params.rx_hash_log_tbl_sz;
1748
1749	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
1750	in = mlx5_vzalloc(inlen);
1751	if (in == NULL)
1752		return (-ENOMEM);
1753	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1754
1755	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
1756	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
1757
1758	for (i = 0; i < sz; i++) {
1759		int ix = i % priv->params.num_channels;
1760
1761		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
1762	}
1763
1764	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
1765
1766	memset(out, 0, sizeof(out));
1767	err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
1768	if (!err)
1769		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
1770
1771	kvfree(in);
1772
1773	return (err);
1774}
1775
1776static void
1777mlx5e_close_rqt(struct mlx5e_priv *priv)
1778{
1779	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
1780	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
1781
1782	memset(in, 0, sizeof(in));
1783
1784	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
1785	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
1786
1787	mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
1788	    sizeof(out));
1789}
1790
1791static void
1792mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
1793{
1794	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1795	__be32 *hkey;
1796
1797	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
1798
1799#define	ROUGH_MAX_L2_L3_HDR_SZ 256
1800
1801#define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1802			  MLX5_HASH_FIELD_SEL_DST_IP)
1803
1804#define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1805			  MLX5_HASH_FIELD_SEL_DST_IP   |\
1806			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
1807			  MLX5_HASH_FIELD_SEL_L4_DPORT)
1808
1809#define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
1810				 MLX5_HASH_FIELD_SEL_DST_IP   |\
1811				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
1812
1813	if (priv->params.hw_lro_en) {
1814		MLX5_SET(tirc, tirc, lro_enable_mask,
1815		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
1816		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
1817		MLX5_SET(tirc, tirc, lro_max_msg_sz,
1818		    (priv->params.lro_wqe_sz -
1819		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
1820		/* TODO: add the option to choose timer value dynamically */
1821		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
1822		    MLX5_CAP_ETH(priv->mdev,
1823		    lro_timer_supported_periods[2]));
1824	}
1825	switch (tt) {
1826	case MLX5E_TT_ANY:
1827		MLX5_SET(tirc, tirc, disp_type,
1828		    MLX5_TIRC_DISP_TYPE_DIRECT);
1829		MLX5_SET(tirc, tirc, inline_rqn,
1830		    priv->channel[0]->rq.rqn);
1831		break;
1832	default:
1833		MLX5_SET(tirc, tirc, disp_type,
1834		    MLX5_TIRC_DISP_TYPE_INDIRECT);
1835		MLX5_SET(tirc, tirc, indirect_table,
1836		    priv->rqtn);
1837		MLX5_SET(tirc, tirc, rx_hash_fn,
1838		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
1839		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1840		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1841		hkey[0] = cpu_to_be32(0xD181C62C);
1842		hkey[1] = cpu_to_be32(0xF7F4DB5B);
1843		hkey[2] = cpu_to_be32(0x1983A2FC);
1844		hkey[3] = cpu_to_be32(0x943E1ADB);
1845		hkey[4] = cpu_to_be32(0xD9389E6B);
1846		hkey[5] = cpu_to_be32(0xD1039C2C);
1847		hkey[6] = cpu_to_be32(0xA74499AD);
1848		hkey[7] = cpu_to_be32(0x593D56D9);
1849		hkey[8] = cpu_to_be32(0xF3253C06);
1850		hkey[9] = cpu_to_be32(0x2ADC1FFC);
1851		break;
1852	}
1853
1854	switch (tt) {
1855	case MLX5E_TT_IPV4_TCP:
1856		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1857		    MLX5_L3_PROT_TYPE_IPV4);
1858		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1859		    MLX5_L4_PROT_TYPE_TCP);
1860		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1861		    MLX5_HASH_ALL);
1862		break;
1863
1864	case MLX5E_TT_IPV6_TCP:
1865		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1866		    MLX5_L3_PROT_TYPE_IPV6);
1867		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1868		    MLX5_L4_PROT_TYPE_TCP);
1869		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1870		    MLX5_HASH_ALL);
1871		break;
1872
1873	case MLX5E_TT_IPV4_UDP:
1874		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1875		    MLX5_L3_PROT_TYPE_IPV4);
1876		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1877		    MLX5_L4_PROT_TYPE_UDP);
1878		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1879		    MLX5_HASH_ALL);
1880		break;
1881
1882	case MLX5E_TT_IPV6_UDP:
1883		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1884		    MLX5_L3_PROT_TYPE_IPV6);
1885		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1886		    MLX5_L4_PROT_TYPE_UDP);
1887		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1888		    MLX5_HASH_ALL);
1889		break;
1890
1891	case MLX5E_TT_IPV4_IPSEC_AH:
1892		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1893		    MLX5_L3_PROT_TYPE_IPV4);
1894		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1895		    MLX5_HASH_IP_IPSEC_SPI);
1896		break;
1897
1898	case MLX5E_TT_IPV6_IPSEC_AH:
1899		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1900		    MLX5_L3_PROT_TYPE_IPV6);
1901		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1902		    MLX5_HASH_IP_IPSEC_SPI);
1903		break;
1904
1905	case MLX5E_TT_IPV4_IPSEC_ESP:
1906		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1907		    MLX5_L3_PROT_TYPE_IPV4);
1908		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1909		    MLX5_HASH_IP_IPSEC_SPI);
1910		break;
1911
1912	case MLX5E_TT_IPV6_IPSEC_ESP:
1913		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1914		    MLX5_L3_PROT_TYPE_IPV6);
1915		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1916		    MLX5_HASH_IP_IPSEC_SPI);
1917		break;
1918
1919	case MLX5E_TT_IPV4:
1920		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1921		    MLX5_L3_PROT_TYPE_IPV4);
1922		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1923		    MLX5_HASH_IP);
1924		break;
1925
1926	case MLX5E_TT_IPV6:
1927		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1928		    MLX5_L3_PROT_TYPE_IPV6);
1929		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1930		    MLX5_HASH_IP);
1931		break;
1932
1933	default:
1934		break;
1935	}
1936}
1937
1938static int
1939mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
1940{
1941	struct mlx5_core_dev *mdev = priv->mdev;
1942	u32 *in;
1943	void *tirc;
1944	int inlen;
1945	int err;
1946
1947	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
1948	in = mlx5_vzalloc(inlen);
1949	if (in == NULL)
1950		return (-ENOMEM);
1951	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
1952
1953	mlx5e_build_tir_ctx(priv, tirc, tt);
1954
1955	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
1956
1957	kvfree(in);
1958
1959	return (err);
1960}
1961
1962static void
1963mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
1964{
1965	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
1966}
1967
1968static int
1969mlx5e_open_tirs(struct mlx5e_priv *priv)
1970{
1971	int err;
1972	int i;
1973
1974	for (i = 0; i < MLX5E_NUM_TT; i++) {
1975		err = mlx5e_open_tir(priv, i);
1976		if (err)
1977			goto err_close_tirs;
1978	}
1979
1980	return (0);
1981
1982err_close_tirs:
1983	for (i--; i >= 0; i--)
1984		mlx5e_close_tir(priv, i);
1985
1986	return (err);
1987}
1988
1989static void
1990mlx5e_close_tirs(struct mlx5e_priv *priv)
1991{
1992	int i;
1993
1994	for (i = 0; i < MLX5E_NUM_TT; i++)
1995		mlx5e_close_tir(priv, i);
1996}
1997
1998/*
1999 * SW MTU does not include headers,
2000 * HW MTU includes all headers and checksums.
2001 */
2002static int
2003mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2004{
2005	struct mlx5e_priv *priv = ifp->if_softc;
2006	struct mlx5_core_dev *mdev = priv->mdev;
2007	int hw_mtu;
2008	int min_mtu;
2009	int err;
2010
2011	/*
2012	 * Trying to set MTU to zero, in order
2013	 * to find out the FW's minimal MTU
2014	 */
2015	err = mlx5_set_port_mtu(mdev, 0);
2016	if (err)
2017		return (err);
2018
2019	err = mlx5_query_port_oper_mtu(mdev, &min_mtu);
2020	if (err) {
2021		if_printf(ifp, "Query port minimal MTU failed\n");
2022		return (err);
2023	}
2024
2025	if (sw_mtu < MLX5E_HW2SW_MTU(min_mtu)) {
2026		ifp->if_mtu = sw_mtu;
2027		return (0);
2028	}
2029
2030	err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
2031	if (err)
2032		return (err);
2033
2034	err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2035	if (!err) {
2036		ifp->if_mtu = MLX5E_HW2SW_MTU(hw_mtu);
2037
2038		if (ifp->if_mtu != sw_mtu) {
2039			if_printf(ifp, "Port MTU %d is different than "
2040			    "ifp mtu %d\n", sw_mtu, (int)ifp->if_mtu);
2041		}
2042	} else {
2043		if_printf(ifp, "Query port MTU, after setting new "
2044		    "MTU value, failed\n");
2045		ifp->if_mtu = sw_mtu;
2046	}
2047	return (0);
2048}
2049
2050int
2051mlx5e_open_locked(struct ifnet *ifp)
2052{
2053	struct mlx5e_priv *priv = ifp->if_softc;
2054	int err;
2055
2056	/* check if already opened */
2057	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2058		return (0);
2059
2060	err = mlx5e_open_tises(priv);
2061	if (err) {
2062		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2063		    __func__, err);
2064		return (err);
2065	}
2066	err = mlx5_vport_alloc_q_counter(priv->mdev, &priv->counter_set_id);
2067	if (err) {
2068		if_printf(priv->ifp,
2069		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2070		    __func__, err);
2071		goto err_close_tises;
2072	}
2073	err = mlx5e_open_channels(priv);
2074	if (err) {
2075		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2076		    __func__, err);
2077		goto err_dalloc_q_counter;
2078	}
2079	err = mlx5e_open_rqt(priv);
2080	if (err) {
2081		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2082		    __func__, err);
2083		goto err_close_channels;
2084	}
2085	err = mlx5e_open_tirs(priv);
2086	if (err) {
2087		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2088		    __func__, err);
2089		goto err_close_rqls;
2090	}
2091	err = mlx5e_open_flow_table(priv);
2092	if (err) {
2093		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2094		    __func__, err);
2095		goto err_close_tirs;
2096	}
2097	err = mlx5e_add_all_vlan_rules(priv);
2098	if (err) {
2099		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2100		    __func__, err);
2101		goto err_close_flow_table;
2102	}
2103	set_bit(MLX5E_STATE_OPENED, &priv->state);
2104
2105	mlx5e_update_carrier(priv);
2106	mlx5e_set_rx_mode_core(priv);
2107
2108	return (0);
2109
2110err_close_flow_table:
2111	mlx5e_close_flow_table(priv);
2112
2113err_close_tirs:
2114	mlx5e_close_tirs(priv);
2115
2116err_close_rqls:
2117	mlx5e_close_rqt(priv);
2118
2119err_close_channels:
2120	mlx5e_close_channels(priv);
2121
2122err_dalloc_q_counter:
2123	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2124
2125err_close_tises:
2126	mlx5e_close_tises(priv);
2127
2128	return (err);
2129}
2130
2131static void
2132mlx5e_open(void *arg)
2133{
2134	struct mlx5e_priv *priv = arg;
2135
2136	PRIV_LOCK(priv);
2137	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2138		if_printf(priv->ifp,
2139		    "%s: Setting port status to up failed\n",
2140		    __func__);
2141
2142	mlx5e_open_locked(priv->ifp);
2143	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2144	PRIV_UNLOCK(priv);
2145}
2146
2147int
2148mlx5e_close_locked(struct ifnet *ifp)
2149{
2150	struct mlx5e_priv *priv = ifp->if_softc;
2151
2152	/* check if already closed */
2153	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2154		return (0);
2155
2156	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2157
2158	mlx5e_set_rx_mode_core(priv);
2159	mlx5e_del_all_vlan_rules(priv);
2160	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2161	mlx5e_close_flow_table(priv);
2162	mlx5e_close_tirs(priv);
2163	mlx5e_close_rqt(priv);
2164	mlx5e_close_channels(priv);
2165	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2166	mlx5e_close_tises(priv);
2167
2168	return (0);
2169}
2170
2171#if (__FreeBSD_version >= 1100000)
2172static uint64_t
2173mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2174{
2175	struct mlx5e_priv *priv = ifp->if_softc;
2176	u64 retval;
2177
2178	/* PRIV_LOCK(priv); XXX not allowed */
2179	switch (cnt) {
2180	case IFCOUNTER_IPACKETS:
2181		retval = priv->stats.vport.rx_packets;
2182		break;
2183	case IFCOUNTER_IERRORS:
2184		retval = priv->stats.vport.rx_error_packets;
2185		break;
2186	case IFCOUNTER_IQDROPS:
2187		retval = priv->stats.vport.rx_out_of_buffer;
2188		break;
2189	case IFCOUNTER_OPACKETS:
2190		retval = priv->stats.vport.tx_packets;
2191		break;
2192	case IFCOUNTER_OERRORS:
2193		retval = priv->stats.vport.tx_error_packets;
2194		break;
2195	case IFCOUNTER_IBYTES:
2196		retval = priv->stats.vport.rx_bytes;
2197		break;
2198	case IFCOUNTER_OBYTES:
2199		retval = priv->stats.vport.tx_bytes;
2200		break;
2201	case IFCOUNTER_IMCASTS:
2202		retval = priv->stats.vport.rx_multicast_packets;
2203		break;
2204	case IFCOUNTER_OMCASTS:
2205		retval = priv->stats.vport.tx_multicast_packets;
2206		break;
2207	case IFCOUNTER_OQDROPS:
2208		retval = priv->stats.vport.tx_queue_dropped;
2209		break;
2210	default:
2211		retval = if_get_counter_default(ifp, cnt);
2212		break;
2213	}
2214	/* PRIV_UNLOCK(priv); XXX not allowed */
2215	return (retval);
2216}
2217#endif
2218
2219static void
2220mlx5e_set_rx_mode(struct ifnet *ifp)
2221{
2222	struct mlx5e_priv *priv = ifp->if_softc;
2223
2224	schedule_work(&priv->set_rx_mode_work);
2225}
2226
2227static int
2228mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2229{
2230	struct mlx5e_priv *priv;
2231	struct ifreq *ifr;
2232	struct ifi2creq i2c;
2233	int error = 0;
2234	int mask = 0;
2235	int size_read = 0;
2236	int module_num;
2237	int max_mtu;
2238
2239	priv = ifp->if_softc;
2240
2241	/* check if detaching */
2242	if (priv == NULL || priv->gone != 0)
2243		return (ENXIO);
2244
2245	switch (command) {
2246	case SIOCSIFMTU:
2247		ifr = (struct ifreq *)data;
2248
2249		PRIV_LOCK(priv);
2250		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2251
2252		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2253		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2254			int was_opened;
2255
2256			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2257			if (was_opened)
2258				mlx5e_close_locked(ifp);
2259
2260			/* set new MTU */
2261			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2262
2263			if (was_opened)
2264				mlx5e_open_locked(ifp);
2265		} else {
2266			error = EINVAL;
2267			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2268			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2269		}
2270		PRIV_UNLOCK(priv);
2271		break;
2272	case SIOCSIFFLAGS:
2273		if ((ifp->if_flags & IFF_UP) &&
2274		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2275			mlx5e_set_rx_mode(ifp);
2276			break;
2277		}
2278		PRIV_LOCK(priv);
2279		if (ifp->if_flags & IFF_UP) {
2280			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2281				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2282					mlx5e_open_locked(ifp);
2283				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2284				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2285			}
2286		} else {
2287			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2288				mlx5_set_port_status(priv->mdev,
2289				    MLX5_PORT_DOWN);
2290				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2291					mlx5e_close_locked(ifp);
2292				mlx5e_update_carrier(priv);
2293				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2294			}
2295		}
2296		PRIV_UNLOCK(priv);
2297		break;
2298	case SIOCADDMULTI:
2299	case SIOCDELMULTI:
2300		mlx5e_set_rx_mode(ifp);
2301		break;
2302	case SIOCSIFMEDIA:
2303	case SIOCGIFMEDIA:
2304	case SIOCGIFXMEDIA:
2305		ifr = (struct ifreq *)data;
2306		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2307		break;
2308	case SIOCSIFCAP:
2309		ifr = (struct ifreq *)data;
2310		PRIV_LOCK(priv);
2311		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2312
2313		if (mask & IFCAP_TXCSUM) {
2314			ifp->if_capenable ^= IFCAP_TXCSUM;
2315			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2316
2317			if (IFCAP_TSO4 & ifp->if_capenable &&
2318			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2319				ifp->if_capenable &= ~IFCAP_TSO4;
2320				ifp->if_hwassist &= ~CSUM_IP_TSO;
2321				if_printf(ifp,
2322				    "tso4 disabled due to -txcsum.\n");
2323			}
2324		}
2325		if (mask & IFCAP_TXCSUM_IPV6) {
2326			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2327			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2328
2329			if (IFCAP_TSO6 & ifp->if_capenable &&
2330			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2331				ifp->if_capenable &= ~IFCAP_TSO6;
2332				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2333				if_printf(ifp,
2334				    "tso6 disabled due to -txcsum6.\n");
2335			}
2336		}
2337		if (mask & IFCAP_RXCSUM)
2338			ifp->if_capenable ^= IFCAP_RXCSUM;
2339		if (mask & IFCAP_RXCSUM_IPV6)
2340			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2341		if (mask & IFCAP_TSO4) {
2342			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2343			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2344				if_printf(ifp, "enable txcsum first.\n");
2345				error = EAGAIN;
2346				goto out;
2347			}
2348			ifp->if_capenable ^= IFCAP_TSO4;
2349			ifp->if_hwassist ^= CSUM_IP_TSO;
2350		}
2351		if (mask & IFCAP_TSO6) {
2352			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2353			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2354				if_printf(ifp, "enable txcsum6 first.\n");
2355				error = EAGAIN;
2356				goto out;
2357			}
2358			ifp->if_capenable ^= IFCAP_TSO6;
2359			ifp->if_hwassist ^= CSUM_IP6_TSO;
2360		}
2361		if (mask & IFCAP_VLAN_HWFILTER) {
2362			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2363				mlx5e_disable_vlan_filter(priv);
2364			else
2365				mlx5e_enable_vlan_filter(priv);
2366
2367			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2368		}
2369		if (mask & IFCAP_VLAN_HWTAGGING)
2370			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2371		if (mask & IFCAP_WOL_MAGIC)
2372			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2373
2374		VLAN_CAPABILITIES(ifp);
2375		/* turn off LRO means also turn of HW LRO - if it's on */
2376		if (mask & IFCAP_LRO) {
2377			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2378			bool need_restart = false;
2379
2380			ifp->if_capenable ^= IFCAP_LRO;
2381			if (!(ifp->if_capenable & IFCAP_LRO)) {
2382				if (priv->params.hw_lro_en) {
2383					priv->params.hw_lro_en = false;
2384					need_restart = true;
2385					/* Not sure this is the correct way */
2386					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2387				}
2388			}
2389			if (was_opened && need_restart) {
2390				mlx5e_close_locked(ifp);
2391				mlx5e_open_locked(ifp);
2392			}
2393		}
2394out:
2395		PRIV_UNLOCK(priv);
2396		break;
2397
2398	case SIOCGI2C:
2399		ifr = (struct ifreq *)data;
2400
2401		/*
2402		 * Copy from the user-space address ifr_data to the
2403		 * kernel-space address i2c
2404		 */
2405		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
2406		if (error)
2407			break;
2408
2409		if (i2c.len > sizeof(i2c.data)) {
2410			error = EINVAL;
2411			break;
2412		}
2413
2414		PRIV_LOCK(priv);
2415		/* Get module_num which is required for the query_eeprom */
2416		error = mlx5_query_module_num(priv->mdev, &module_num);
2417		if (error) {
2418			if_printf(ifp, "Query module num failed, eeprom "
2419			    "reading is not supported\n");
2420			goto err_i2c;
2421		}
2422
2423		/*
2424		 * Note that we ignore i2c.addr here. The driver hardcodes
2425		 * the address to 0x50, while standard expects it to be 0xA0.
2426		 */
2427		error = mlx5_query_eeprom(priv->mdev,
2428		    MLX5E_I2C_ADDR_LOW, MLX5E_EEPROM_LOW_PAGE,
2429		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2430		    (uint32_t *)i2c.data, &size_read);
2431		if (error) {
2432			if_printf(ifp, "Query eeprom failed, eeprom "
2433			    "reading is not supported\n");
2434			goto err_i2c;
2435		}
2436
2437		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2438			error = mlx5_query_eeprom(priv->mdev,
2439			    MLX5E_I2C_ADDR_LOW, MLX5E_EEPROM_LOW_PAGE,
2440			    (uint32_t)(i2c.offset + size_read),
2441			    (uint32_t)(i2c.len - size_read), module_num,
2442			    (uint32_t *)(i2c.data + size_read), &size_read);
2443		}
2444		if (error) {
2445			if_printf(ifp, "Query eeprom failed, eeprom "
2446			    "reading is not supported\n");
2447			goto err_i2c;
2448		}
2449
2450		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
2451err_i2c:
2452		PRIV_UNLOCK(priv);
2453		break;
2454
2455	default:
2456		error = ether_ioctl(ifp, command, data);
2457		break;
2458	}
2459	return (error);
2460}
2461
2462static int
2463mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2464{
2465	/*
2466	 * TODO: uncoment once FW really sets all these bits if
2467	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2468	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2469	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2470	 * -ENOTSUPP;
2471	 */
2472
2473	/* TODO: add more must-to-have features */
2474
2475	return (0);
2476}
2477
2478static void
2479mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2480    struct mlx5e_priv *priv,
2481    int num_comp_vectors)
2482{
2483	/*
2484	 * TODO: Consider link speed for setting "log_sq_size",
2485	 * "log_rq_size" and "cq_moderation_xxx":
2486	 */
2487	priv->params.log_sq_size =
2488	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2489	priv->params.log_rq_size =
2490	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2491	priv->params.rx_cq_moderation_usec =
2492	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2493	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2494	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2495	priv->params.rx_cq_moderation_mode =
2496	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
2497	priv->params.rx_cq_moderation_pkts =
2498	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
2499	priv->params.tx_cq_moderation_usec =
2500	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
2501	priv->params.tx_cq_moderation_pkts =
2502	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
2503	priv->params.min_rx_wqes =
2504	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
2505	priv->params.rx_hash_log_tbl_sz =
2506	    (order_base_2(num_comp_vectors) >
2507	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
2508	    order_base_2(num_comp_vectors) :
2509	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
2510	priv->params.num_tc = 1;
2511	priv->params.default_vlan_prio = 0;
2512	priv->counter_set_id = -1;
2513
2514	/*
2515	 * hw lro is currently defaulted to off. when it won't anymore we
2516	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
2517	 */
2518	priv->params.hw_lro_en = false;
2519	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
2520
2521	priv->mdev = mdev;
2522	priv->params.num_channels = num_comp_vectors;
2523	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
2524	priv->queue_mapping_channel_mask =
2525	    roundup_pow_of_two(num_comp_vectors) - 1;
2526	priv->num_tc = priv->params.num_tc;
2527	priv->default_vlan_prio = priv->params.default_vlan_prio;
2528
2529	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
2530	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
2531	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
2532}
2533
2534static int
2535mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2536    struct mlx5_core_mr *mr)
2537{
2538	struct ifnet *ifp = priv->ifp;
2539	struct mlx5_core_dev *mdev = priv->mdev;
2540	struct mlx5_create_mkey_mbox_in *in;
2541	int err;
2542
2543	in = mlx5_vzalloc(sizeof(*in));
2544	if (in == NULL) {
2545		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
2546		return (-ENOMEM);
2547	}
2548	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
2549	    MLX5_PERM_LOCAL_READ |
2550	    MLX5_ACCESS_MODE_PA;
2551	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
2552	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
2553
2554	err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
2555	    NULL);
2556	if (err)
2557		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
2558		    __func__, err);
2559
2560	kvfree(in);
2561
2562	return (err);
2563}
2564
2565static const char *mlx5e_vport_stats_desc[] = {
2566	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
2567};
2568
2569static const char *mlx5e_pport_stats_desc[] = {
2570	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
2571};
2572
2573static void
2574mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
2575{
2576	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
2577	sx_init(&priv->state_lock, "mlx5state");
2578	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
2579}
2580
2581static void
2582mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
2583{
2584	mtx_destroy(&priv->async_events_mtx);
2585	sx_destroy(&priv->state_lock);
2586}
2587
2588static int
2589sysctl_firmware(SYSCTL_HANDLER_ARGS)
2590{
2591	/*
2592	 * %d.%d%.d the string format.
2593	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
2594	 * We need at most 5 chars to store that.
2595	 * It also has: two "." and NULL at the end, which means we need 18
2596	 * (5*3 + 3) chars at most.
2597	 */
2598	char fw[18];
2599	struct mlx5e_priv *priv = arg1;
2600	int error;
2601
2602	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
2603	    fw_rev_sub(priv->mdev));
2604	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
2605	return (error);
2606}
2607
2608static void
2609mlx5e_add_hw_stats(struct mlx5e_priv *priv)
2610{
2611	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2612	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
2613	    sysctl_firmware, "A", "HCA firmware version");
2614
2615	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2616	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
2617	    "Board ID");
2618}
2619
2620static void *
2621mlx5e_create_ifp(struct mlx5_core_dev *mdev)
2622{
2623	static volatile int mlx5_en_unit;
2624	struct ifnet *ifp;
2625	struct mlx5e_priv *priv;
2626	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
2627	struct sysctl_oid_list *child;
2628	int ncv = mdev->priv.eq_table.num_comp_vectors;
2629	char unit[16];
2630	int err;
2631	int i;
2632	u32 eth_proto_cap;
2633
2634	if (mlx5e_check_required_hca_cap(mdev)) {
2635		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
2636		return (NULL);
2637	}
2638	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
2639	if (priv == NULL) {
2640		mlx5_core_err(mdev, "malloc() failed\n");
2641		return (NULL);
2642	}
2643	mlx5e_priv_mtx_init(priv);
2644
2645	ifp = priv->ifp = if_alloc(IFT_ETHER);
2646	if (ifp == NULL) {
2647		mlx5_core_err(mdev, "if_alloc() failed\n");
2648		goto err_free_priv;
2649	}
2650	ifp->if_softc = priv;
2651	if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
2652	ifp->if_mtu = ETHERMTU;
2653	ifp->if_init = mlx5e_open;
2654	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2655	ifp->if_ioctl = mlx5e_ioctl;
2656	ifp->if_transmit = mlx5e_xmit;
2657	ifp->if_qflush = if_qflush;
2658#if (__FreeBSD_version >= 1100000)
2659	ifp->if_get_counter = mlx5e_get_counter;
2660#endif
2661	ifp->if_snd.ifq_maxlen = ifqmaxlen;
2662	/*
2663         * Set driver features
2664         */
2665	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
2666	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
2667	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
2668	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
2669	ifp->if_capabilities |= IFCAP_LRO;
2670	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
2671
2672	/* set TSO limits so that we don't have to drop TX packets */
2673	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2674	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
2675	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
2676
2677	ifp->if_capenable = ifp->if_capabilities;
2678	ifp->if_hwassist = 0;
2679	if (ifp->if_capenable & IFCAP_TSO)
2680		ifp->if_hwassist |= CSUM_TSO;
2681	if (ifp->if_capenable & IFCAP_TXCSUM)
2682		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2683	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2684		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2685
2686	/* ifnet sysctl tree */
2687	sysctl_ctx_init(&priv->sysctl_ctx);
2688	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
2689	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
2690	if (priv->sysctl_ifnet == NULL) {
2691		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2692		goto err_free_sysctl;
2693	}
2694	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
2695	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2696	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
2697	if (priv->sysctl_ifnet == NULL) {
2698		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2699		goto err_free_sysctl;
2700	}
2701
2702	/* HW sysctl tree */
2703	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
2704	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
2705	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
2706	if (priv->sysctl_hw == NULL) {
2707		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2708		goto err_free_sysctl;
2709	}
2710	mlx5e_build_ifp_priv(mdev, priv, ncv);
2711	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
2712	if (err) {
2713		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
2714		    __func__, err);
2715		goto err_free_sysctl;
2716	}
2717	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
2718	if (err) {
2719		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
2720		    __func__, err);
2721		goto err_unmap_free_uar;
2722	}
2723	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
2724	if (err) {
2725		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
2726		    __func__, err);
2727		goto err_dealloc_pd;
2728	}
2729	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
2730	if (err) {
2731		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
2732		    __func__, err);
2733		goto err_dealloc_transport_domain;
2734	}
2735	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
2736
2737	/* set default MTU */
2738	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
2739
2740	/* Set desc */
2741	device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
2742
2743	/* Set default media status */
2744	priv->media_status_last = IFM_AVALID;
2745	priv->media_active_last = IFM_ETHER | IFM_AUTO;
2746
2747	/* Pauseframes are enabled by default */
2748	priv->params_ethtool.tx_pauseframe_control = 1;
2749	priv->params_ethtool.rx_pauseframe_control = 1;
2750
2751	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
2752	if (err) {
2753		eth_proto_cap = 0;
2754		if_printf(ifp, "%s: Query port media capability failed, %d\n",
2755		    __func__, err);
2756	}
2757
2758	/* Setup supported medias */
2759	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
2760	    mlx5e_media_change, mlx5e_media_status);
2761
2762	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
2763		if (mlx5e_mode_table[i].baudrate == 0)
2764			continue;
2765		if (MLX5E_PROT_MASK(i) & eth_proto_cap)
2766			ifmedia_add(&priv->media,
2767			    IFM_ETHER | mlx5e_mode_table[i].subtype |
2768			    IFM_FDX, 0, NULL);
2769	}
2770
2771	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2772	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
2773	ether_ifattach(ifp, dev_addr);
2774
2775	/* Register for VLAN events */
2776	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
2777	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
2778	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
2779	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
2780
2781	/* Link is down by default */
2782	if_link_state_change(ifp, LINK_STATE_DOWN);
2783
2784	mlx5e_enable_async_events(priv);
2785
2786	mlx5e_add_hw_stats(priv);
2787
2788	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2789	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
2790	    priv->stats.vport.arg);
2791
2792	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2793	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
2794	    priv->stats.pport.arg);
2795
2796	mlx5e_create_ethtool(priv);
2797
2798	mtx_lock(&priv->async_events_mtx);
2799	mlx5e_update_stats(priv);
2800	mtx_unlock(&priv->async_events_mtx);
2801
2802	return (priv);
2803
2804err_dealloc_transport_domain:
2805	mlx5_dealloc_transport_domain(mdev, priv->tdn);
2806
2807err_dealloc_pd:
2808	mlx5_core_dealloc_pd(mdev, priv->pdn);
2809
2810err_unmap_free_uar:
2811	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
2812
2813err_free_sysctl:
2814	sysctl_ctx_free(&priv->sysctl_ctx);
2815
2816	if_free(ifp);
2817
2818err_free_priv:
2819	mlx5e_priv_mtx_destroy(priv);
2820	free(priv, M_MLX5EN);
2821	return (NULL);
2822}
2823
2824static void
2825mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
2826{
2827	struct mlx5e_priv *priv = vpriv;
2828	struct ifnet *ifp = priv->ifp;
2829
2830	/* don't allow more IOCTLs */
2831	priv->gone = 1;
2832
2833	/* XXX wait a bit to allow IOCTL handlers to complete */
2834	pause("W", hz);
2835
2836	/* stop watchdog timer */
2837	callout_drain(&priv->watchdog);
2838
2839	if (priv->vlan_attach != NULL)
2840		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
2841	if (priv->vlan_detach != NULL)
2842		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
2843
2844	/* make sure device gets closed */
2845	PRIV_LOCK(priv);
2846	mlx5e_close_locked(ifp);
2847	PRIV_UNLOCK(priv);
2848
2849	/* unregister device */
2850	ifmedia_removeall(&priv->media);
2851	ether_ifdetach(ifp);
2852	if_free(ifp);
2853
2854	/* destroy all remaining sysctl nodes */
2855	if (priv->sysctl_debug)
2856		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
2857	sysctl_ctx_free(&priv->stats.vport.ctx);
2858	sysctl_ctx_free(&priv->stats.pport.ctx);
2859	sysctl_ctx_free(&priv->sysctl_ctx);
2860
2861	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
2862	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
2863	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
2864	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
2865	mlx5e_disable_async_events(priv);
2866	flush_scheduled_work();
2867	mlx5e_priv_mtx_destroy(priv);
2868	free(priv, M_MLX5EN);
2869}
2870
2871static void *
2872mlx5e_get_ifp(void *vpriv)
2873{
2874	struct mlx5e_priv *priv = vpriv;
2875
2876	return (priv->ifp);
2877}
2878
2879static struct mlx5_interface mlx5e_interface = {
2880	.add = mlx5e_create_ifp,
2881	.remove = mlx5e_destroy_ifp,
2882	.event = mlx5e_async_event,
2883	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
2884	.get_dev = mlx5e_get_ifp,
2885};
2886
2887void
2888mlx5e_init(void)
2889{
2890	mlx5_register_interface(&mlx5e_interface);
2891}
2892
2893void
2894mlx5e_cleanup(void)
2895{
2896	mlx5_unregister_interface(&mlx5e_interface);
2897}
2898
2899module_init_order(mlx5e_init, SI_ORDER_THIRD);
2900module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
2901
2902#if (__FreeBSD_version >= 1100000)
2903MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
2904#endif
2905MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
2906MODULE_VERSION(mlx5en, 1);
2907