mlx5_en_main.c revision 341977
1/*-
2 * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c 341977 2018-12-12 13:05:45Z hselasky $
26 */
27
28#include "en.h"
29
30#include <sys/sockio.h>
31#include <machine/atomic.h>
32
33#ifndef ETH_DRIVER_VERSION
34#define	ETH_DRIVER_VERSION	"3.4.2"
35#endif
36
37static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver "
38	ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
39
40static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
41
42struct mlx5e_channel_param {
43	struct mlx5e_rq_param rq;
44	struct mlx5e_sq_param sq;
45	struct mlx5e_cq_param rx_cq;
46	struct mlx5e_cq_param tx_cq;
47};
48
49static const struct {
50	u32	subtype;
51	u64	baudrate;
52}	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
53
54	[MLX5E_1000BASE_CX_SGMII] = {
55		.subtype = IFM_1000_CX_SGMII,
56		.baudrate = IF_Mbps(1000ULL),
57	},
58	[MLX5E_1000BASE_KX] = {
59		.subtype = IFM_1000_KX,
60		.baudrate = IF_Mbps(1000ULL),
61	},
62	[MLX5E_10GBASE_CX4] = {
63		.subtype = IFM_10G_CX4,
64		.baudrate = IF_Gbps(10ULL),
65	},
66	[MLX5E_10GBASE_KX4] = {
67		.subtype = IFM_10G_KX4,
68		.baudrate = IF_Gbps(10ULL),
69	},
70	[MLX5E_10GBASE_KR] = {
71		.subtype = IFM_10G_KR,
72		.baudrate = IF_Gbps(10ULL),
73	},
74	[MLX5E_20GBASE_KR2] = {
75		.subtype = IFM_20G_KR2,
76		.baudrate = IF_Gbps(20ULL),
77	},
78	[MLX5E_40GBASE_CR4] = {
79		.subtype = IFM_40G_CR4,
80		.baudrate = IF_Gbps(40ULL),
81	},
82	[MLX5E_40GBASE_KR4] = {
83		.subtype = IFM_40G_KR4,
84		.baudrate = IF_Gbps(40ULL),
85	},
86	[MLX5E_56GBASE_R4] = {
87		.subtype = IFM_56G_R4,
88		.baudrate = IF_Gbps(56ULL),
89	},
90	[MLX5E_10GBASE_CR] = {
91		.subtype = IFM_10G_CR1,
92		.baudrate = IF_Gbps(10ULL),
93	},
94	[MLX5E_10GBASE_SR] = {
95		.subtype = IFM_10G_SR,
96		.baudrate = IF_Gbps(10ULL),
97	},
98	[MLX5E_10GBASE_ER] = {
99		.subtype = IFM_10G_ER,
100		.baudrate = IF_Gbps(10ULL),
101	},
102	[MLX5E_40GBASE_SR4] = {
103		.subtype = IFM_40G_SR4,
104		.baudrate = IF_Gbps(40ULL),
105	},
106	[MLX5E_40GBASE_LR4] = {
107		.subtype = IFM_40G_LR4,
108		.baudrate = IF_Gbps(40ULL),
109	},
110	[MLX5E_100GBASE_CR4] = {
111		.subtype = IFM_100G_CR4,
112		.baudrate = IF_Gbps(100ULL),
113	},
114	[MLX5E_100GBASE_SR4] = {
115		.subtype = IFM_100G_SR4,
116		.baudrate = IF_Gbps(100ULL),
117	},
118	[MLX5E_100GBASE_KR4] = {
119		.subtype = IFM_100G_KR4,
120		.baudrate = IF_Gbps(100ULL),
121	},
122	[MLX5E_100GBASE_LR4] = {
123		.subtype = IFM_100G_LR4,
124		.baudrate = IF_Gbps(100ULL),
125	},
126	[MLX5E_100BASE_TX] = {
127		.subtype = IFM_100_TX,
128		.baudrate = IF_Mbps(100ULL),
129	},
130	[MLX5E_1000BASE_T] = {
131		.subtype = IFM_1000_T,
132		.baudrate = IF_Mbps(1000ULL),
133	},
134	[MLX5E_10GBASE_T] = {
135		.subtype = IFM_10G_T,
136		.baudrate = IF_Gbps(10ULL),
137	},
138	[MLX5E_25GBASE_CR] = {
139		.subtype = IFM_25G_CR,
140		.baudrate = IF_Gbps(25ULL),
141	},
142	[MLX5E_25GBASE_KR] = {
143		.subtype = IFM_25G_KR,
144		.baudrate = IF_Gbps(25ULL),
145	},
146	[MLX5E_25GBASE_SR] = {
147		.subtype = IFM_25G_SR,
148		.baudrate = IF_Gbps(25ULL),
149	},
150	[MLX5E_50GBASE_CR2] = {
151		.subtype = IFM_50G_CR2,
152		.baudrate = IF_Gbps(50ULL),
153	},
154	[MLX5E_50GBASE_KR2] = {
155		.subtype = IFM_50G_KR2,
156		.baudrate = IF_Gbps(50ULL),
157	},
158};
159
160MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
161
162static void
163mlx5e_update_carrier(struct mlx5e_priv *priv)
164{
165	struct mlx5_core_dev *mdev = priv->mdev;
166	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
167	u32 eth_proto_oper;
168	int error;
169	u8 port_state;
170	u8 is_er_type;
171	u8 i;
172
173	port_state = mlx5_query_vport_state(mdev,
174	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
175
176	if (port_state == VPORT_STATE_UP) {
177		priv->media_status_last |= IFM_ACTIVE;
178	} else {
179		priv->media_status_last &= ~IFM_ACTIVE;
180		priv->media_active_last = IFM_ETHER;
181		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
182		return;
183	}
184
185	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
186	if (error) {
187		priv->media_active_last = IFM_ETHER;
188		priv->ifp->if_baudrate = 1;
189		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
190		    __func__, error);
191		return;
192	}
193	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
194
195	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
196		if (mlx5e_mode_table[i].baudrate == 0)
197			continue;
198		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
199			u32 subtype = mlx5e_mode_table[i].subtype;
200
201			priv->ifp->if_baudrate =
202			    mlx5e_mode_table[i].baudrate;
203
204			switch (subtype) {
205			case IFM_10G_ER:
206				error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
207				if (error != 0) {
208					if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
209					    __func__, error);
210				}
211				if (error != 0 || is_er_type == 0)
212					subtype = IFM_10G_LR;
213				break;
214			case IFM_40G_LR4:
215				error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
216				if (error != 0) {
217					if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
218					    __func__, error);
219				}
220				if (error == 0 && is_er_type != 0)
221					subtype = IFM_40G_ER4;
222				break;
223			}
224			priv->media_active_last = subtype | IFM_ETHER | IFM_FDX;
225			break;
226		}
227	}
228	if_link_state_change(priv->ifp, LINK_STATE_UP);
229}
230
231static void
232mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
233{
234	struct mlx5e_priv *priv = dev->if_softc;
235
236	ifmr->ifm_status = priv->media_status_last;
237	ifmr->ifm_active = priv->media_active_last |
238	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
239	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
240
241}
242
243static u32
244mlx5e_find_link_mode(u32 subtype)
245{
246	u32 i;
247	u32 link_mode = 0;
248
249	switch (subtype) {
250	case IFM_10G_LR:
251		subtype = IFM_10G_ER;
252		break;
253	case IFM_40G_ER4:
254		subtype = IFM_40G_LR4;
255		break;
256	}
257
258	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
259		if (mlx5e_mode_table[i].baudrate == 0)
260			continue;
261		if (mlx5e_mode_table[i].subtype == subtype)
262			link_mode |= MLX5E_PROT_MASK(i);
263	}
264
265	return (link_mode);
266}
267
268static int
269mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
270{
271	return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
272	    priv->params.rx_pauseframe_control,
273	    priv->params.tx_pauseframe_control,
274	    priv->params.rx_priority_flow_control,
275	    priv->params.tx_priority_flow_control));
276}
277
278static int
279mlx5e_set_port_pfc(struct mlx5e_priv *priv)
280{
281	int error;
282
283	if (priv->params.rx_pauseframe_control ||
284	    priv->params.tx_pauseframe_control) {
285		if_printf(priv->ifp,
286		    "Global pauseframes must be disabled before enabling PFC.\n");
287		error = -EINVAL;
288	} else {
289		error = mlx5e_set_port_pause_and_pfc(priv);
290	}
291	return (error);
292}
293
294static int
295mlx5e_media_change(struct ifnet *dev)
296{
297	struct mlx5e_priv *priv = dev->if_softc;
298	struct mlx5_core_dev *mdev = priv->mdev;
299	u32 eth_proto_cap;
300	u32 link_mode;
301	int was_opened;
302	int locked;
303	int error;
304
305	locked = PRIV_LOCKED(priv);
306	if (!locked)
307		PRIV_LOCK(priv);
308
309	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
310		error = EINVAL;
311		goto done;
312	}
313	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
314
315	/* query supported capabilities */
316	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
317	if (error != 0) {
318		if_printf(dev, "Query port media capability failed\n");
319		goto done;
320	}
321	/* check for autoselect */
322	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
323		link_mode = eth_proto_cap;
324		if (link_mode == 0) {
325			if_printf(dev, "Port media capability is zero\n");
326			error = EINVAL;
327			goto done;
328		}
329	} else {
330		link_mode = link_mode & eth_proto_cap;
331		if (link_mode == 0) {
332			if_printf(dev, "Not supported link mode requested\n");
333			error = EINVAL;
334			goto done;
335		}
336	}
337	if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
338		/* check if PFC is enabled */
339		if (priv->params.rx_priority_flow_control ||
340		    priv->params.tx_priority_flow_control) {
341			if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
342			error = EINVAL;
343			goto done;
344		}
345	}
346	/* update pauseframe control bits */
347	priv->params.rx_pauseframe_control =
348	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
349	priv->params.tx_pauseframe_control =
350	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
351
352	/* check if device is opened */
353	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
354
355	/* reconfigure the hardware */
356	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
357	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
358	error = -mlx5e_set_port_pause_and_pfc(priv);
359	if (was_opened)
360		mlx5_set_port_status(mdev, MLX5_PORT_UP);
361
362done:
363	if (!locked)
364		PRIV_UNLOCK(priv);
365	return (error);
366}
367
368static void
369mlx5e_update_carrier_work(struct work_struct *work)
370{
371	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
372	    update_carrier_work);
373
374	PRIV_LOCK(priv);
375	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
376		mlx5e_update_carrier(priv);
377	PRIV_UNLOCK(priv);
378}
379
380/*
381 * This function reads the physical port counters from the firmware
382 * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
383 * macros. The output is converted from big-endian 64-bit values into
384 * host endian ones and stored in the "priv->stats.pport" structure.
385 */
386static void
387mlx5e_update_pport_counters(struct mlx5e_priv *priv)
388{
389	struct mlx5_core_dev *mdev = priv->mdev;
390	struct mlx5e_pport_stats *s = &priv->stats.pport;
391	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
392	u32 *in;
393	u32 *out;
394	const u64 *ptr;
395	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
396	unsigned x;
397	unsigned y;
398	unsigned z;
399
400	/* allocate firmware request structures */
401	in = mlx5_vzalloc(sz);
402	out = mlx5_vzalloc(sz);
403	if (in == NULL || out == NULL)
404		goto free_out;
405
406	/*
407	 * Get pointer to the 64-bit counter set which is located at a
408	 * fixed offset in the output firmware request structure:
409	 */
410	ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
411
412	MLX5_SET(ppcnt_reg, in, local_port, 1);
413
414	/* read IEEE802_3 counter group using predefined counter layout */
415	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
416	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
417	for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
418	     x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
419		s->arg[y] = be64toh(ptr[x]);
420
421	/* read RFC2819 counter group using predefined counter layout */
422	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
423	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
424	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
425		s->arg[y] = be64toh(ptr[x]);
426	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
427	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
428		s_debug->arg[y] = be64toh(ptr[x]);
429
430	/* read RFC2863 counter group using predefined counter layout */
431	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
432	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
433	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
434		s_debug->arg[y] = be64toh(ptr[x]);
435
436	/* read physical layer stats counter group using predefined counter layout */
437	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
438	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
439	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
440		s_debug->arg[y] = be64toh(ptr[x]);
441
442	/* read per-priority counters */
443	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
444
445	/* iterate all the priorities */
446	for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
447		MLX5_SET(ppcnt_reg, in, prio_tc, z);
448		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
449
450		/* read per priority stats counter group using predefined counter layout */
451		for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
452		    MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
453			s->arg[y] = be64toh(ptr[x]);
454	}
455free_out:
456	/* free firmware request structures */
457	kvfree(in);
458	kvfree(out);
459}
460
461/*
462 * This function is called regularly to collect all statistics
463 * counters from the firmware. The values can be viewed through the
464 * sysctl interface. Execution is serialized using the priv's global
465 * configuration lock.
466 */
467static void
468mlx5e_update_stats_work(struct work_struct *work)
469{
470	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
471	    update_stats_work);
472	struct mlx5_core_dev *mdev = priv->mdev;
473	struct mlx5e_vport_stats *s = &priv->stats.vport;
474	struct mlx5e_rq_stats *rq_stats;
475	struct mlx5e_sq_stats *sq_stats;
476	struct buf_ring *sq_br;
477#if (__FreeBSD_version < 1100000)
478	struct ifnet *ifp = priv->ifp;
479#endif
480
481	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
482	u32 *out;
483	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
484	u64 tso_packets = 0;
485	u64 tso_bytes = 0;
486	u64 tx_queue_dropped = 0;
487	u64 tx_defragged = 0;
488	u64 tx_offload_none = 0;
489	u64 lro_packets = 0;
490	u64 lro_bytes = 0;
491	u64 sw_lro_queued = 0;
492	u64 sw_lro_flushed = 0;
493	u64 rx_csum_none = 0;
494	u64 rx_wqe_err = 0;
495	u32 rx_out_of_buffer = 0;
496	int i;
497	int j;
498
499	PRIV_LOCK(priv);
500	out = mlx5_vzalloc(outlen);
501	if (out == NULL)
502		goto free_out;
503	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
504		goto free_out;
505
506	/* Collect firts the SW counters and then HW for consistency */
507	for (i = 0; i < priv->params.num_channels; i++) {
508		struct mlx5e_rq *rq = &priv->channel[i]->rq;
509
510		rq_stats = &priv->channel[i]->rq.stats;
511
512		/* collect stats from LRO */
513		rq_stats->sw_lro_queued = rq->lro.lro_queued;
514		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
515		sw_lro_queued += rq_stats->sw_lro_queued;
516		sw_lro_flushed += rq_stats->sw_lro_flushed;
517		lro_packets += rq_stats->lro_packets;
518		lro_bytes += rq_stats->lro_bytes;
519		rx_csum_none += rq_stats->csum_none;
520		rx_wqe_err += rq_stats->wqe_err;
521
522		for (j = 0; j < priv->num_tc; j++) {
523			sq_stats = &priv->channel[i]->sq[j].stats;
524			sq_br = priv->channel[i]->sq[j].br;
525
526			tso_packets += sq_stats->tso_packets;
527			tso_bytes += sq_stats->tso_bytes;
528			tx_queue_dropped += sq_stats->dropped;
529			if (sq_br != NULL)
530				tx_queue_dropped += sq_br->br_drops;
531			tx_defragged += sq_stats->defragged;
532			tx_offload_none += sq_stats->csum_offload_none;
533		}
534	}
535
536	s->tx_jumbo_packets =
537	    priv->stats.port_stats_debug.p1519to2047octets +
538	    priv->stats.port_stats_debug.p2048to4095octets +
539	    priv->stats.port_stats_debug.p4096to8191octets +
540	    priv->stats.port_stats_debug.p8192to10239octets;
541
542	/* update counters */
543	s->tso_packets = tso_packets;
544	s->tso_bytes = tso_bytes;
545	s->tx_queue_dropped = tx_queue_dropped;
546	s->tx_defragged = tx_defragged;
547	s->lro_packets = lro_packets;
548	s->lro_bytes = lro_bytes;
549	s->sw_lro_queued = sw_lro_queued;
550	s->sw_lro_flushed = sw_lro_flushed;
551	s->rx_csum_none = rx_csum_none;
552	s->rx_wqe_err = rx_wqe_err;
553
554	/* HW counters */
555	memset(in, 0, sizeof(in));
556
557	MLX5_SET(query_vport_counter_in, in, opcode,
558	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
559	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
560	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
561
562	memset(out, 0, outlen);
563
564	/* get number of out-of-buffer drops first */
565	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
566	    &rx_out_of_buffer))
567		goto free_out;
568
569	/* accumulate difference into a 64-bit counter */
570	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
571	s->rx_out_of_buffer_prev = rx_out_of_buffer;
572
573	/* get port statistics */
574	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
575		goto free_out;
576
577#define	MLX5_GET_CTR(out, x) \
578	MLX5_GET64(query_vport_counter_out, out, x)
579
580	s->rx_error_packets =
581	    MLX5_GET_CTR(out, received_errors.packets);
582	s->rx_error_bytes =
583	    MLX5_GET_CTR(out, received_errors.octets);
584	s->tx_error_packets =
585	    MLX5_GET_CTR(out, transmit_errors.packets);
586	s->tx_error_bytes =
587	    MLX5_GET_CTR(out, transmit_errors.octets);
588
589	s->rx_unicast_packets =
590	    MLX5_GET_CTR(out, received_eth_unicast.packets);
591	s->rx_unicast_bytes =
592	    MLX5_GET_CTR(out, received_eth_unicast.octets);
593	s->tx_unicast_packets =
594	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
595	s->tx_unicast_bytes =
596	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
597
598	s->rx_multicast_packets =
599	    MLX5_GET_CTR(out, received_eth_multicast.packets);
600	s->rx_multicast_bytes =
601	    MLX5_GET_CTR(out, received_eth_multicast.octets);
602	s->tx_multicast_packets =
603	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
604	s->tx_multicast_bytes =
605	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
606
607	s->rx_broadcast_packets =
608	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
609	s->rx_broadcast_bytes =
610	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
611	s->tx_broadcast_packets =
612	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
613	s->tx_broadcast_bytes =
614	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
615
616	s->rx_packets =
617	    s->rx_unicast_packets +
618	    s->rx_multicast_packets +
619	    s->rx_broadcast_packets -
620	    s->rx_out_of_buffer;
621	s->rx_bytes =
622	    s->rx_unicast_bytes +
623	    s->rx_multicast_bytes +
624	    s->rx_broadcast_bytes;
625	s->tx_packets =
626	    s->tx_unicast_packets +
627	    s->tx_multicast_packets +
628	    s->tx_broadcast_packets;
629	s->tx_bytes =
630	    s->tx_unicast_bytes +
631	    s->tx_multicast_bytes +
632	    s->tx_broadcast_bytes;
633
634	/* Update calculated offload counters */
635	s->tx_csum_offload = s->tx_packets - tx_offload_none;
636	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
637
638	/* Get physical port counters */
639	mlx5e_update_pport_counters(priv);
640
641#if (__FreeBSD_version < 1100000)
642	/* no get_counters interface in fbsd 10 */
643	ifp->if_ipackets = s->rx_packets;
644	ifp->if_ierrors = s->rx_error_packets +
645	    priv->stats.pport.alignment_err +
646	    priv->stats.pport.check_seq_err +
647	    priv->stats.pport.crc_align_errors +
648	    priv->stats.pport.in_range_len_errors +
649	    priv->stats.pport.jabbers +
650	    priv->stats.pport.out_of_range_len +
651	    priv->stats.pport.oversize_pkts +
652	    priv->stats.pport.symbol_err +
653	    priv->stats.pport.too_long_errors +
654	    priv->stats.pport.undersize_pkts +
655	    priv->stats.pport.unsupported_op_rx;
656	ifp->if_iqdrops = s->rx_out_of_buffer +
657	    priv->stats.pport.drop_events;
658	ifp->if_opackets = s->tx_packets;
659	ifp->if_oerrors = s->tx_error_packets;
660	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
661	ifp->if_ibytes = s->rx_bytes;
662	ifp->if_obytes = s->tx_bytes;
663	ifp->if_collisions =
664	    priv->stats.pport.collisions;
665#endif
666
667free_out:
668	kvfree(out);
669
670	/* Update diagnostics, if any */
671	if (priv->params_ethtool.diag_pci_enable ||
672	    priv->params_ethtool.diag_general_enable) {
673		int error = mlx5_core_get_diagnostics_full(mdev,
674		    priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
675		    priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
676		if (error != 0)
677			if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
678	}
679	PRIV_UNLOCK(priv);
680}
681
682static void
683mlx5e_update_stats(void *arg)
684{
685	struct mlx5e_priv *priv = arg;
686
687	queue_work(priv->wq, &priv->update_stats_work);
688
689	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
690}
691
692static void
693mlx5e_async_event_sub(struct mlx5e_priv *priv,
694    enum mlx5_dev_event event)
695{
696	switch (event) {
697	case MLX5_DEV_EVENT_PORT_UP:
698	case MLX5_DEV_EVENT_PORT_DOWN:
699		queue_work(priv->wq, &priv->update_carrier_work);
700		break;
701
702	default:
703		break;
704	}
705}
706
707static void
708mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
709    enum mlx5_dev_event event, unsigned long param)
710{
711	struct mlx5e_priv *priv = vpriv;
712
713	mtx_lock(&priv->async_events_mtx);
714	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
715		mlx5e_async_event_sub(priv, event);
716	mtx_unlock(&priv->async_events_mtx);
717}
718
719static void
720mlx5e_enable_async_events(struct mlx5e_priv *priv)
721{
722	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
723}
724
725static void
726mlx5e_disable_async_events(struct mlx5e_priv *priv)
727{
728	mtx_lock(&priv->async_events_mtx);
729	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
730	mtx_unlock(&priv->async_events_mtx);
731}
732
733static const char *mlx5e_rq_stats_desc[] = {
734	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
735};
736
737static int
738mlx5e_create_rq(struct mlx5e_channel *c,
739    struct mlx5e_rq_param *param,
740    struct mlx5e_rq *rq)
741{
742	struct mlx5e_priv *priv = c->priv;
743	struct mlx5_core_dev *mdev = priv->mdev;
744	char buffer[16];
745	void *rqc = param->rqc;
746	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
747	int wq_sz;
748	int err;
749	int i;
750	u32 nsegs, wqe_sz;
751
752	err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
753	if (err != 0)
754		goto done;
755
756	/* Create DMA descriptor TAG */
757	if ((err = -bus_dma_tag_create(
758	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
759	    1,				/* any alignment */
760	    0,				/* no boundary */
761	    BUS_SPACE_MAXADDR,		/* lowaddr */
762	    BUS_SPACE_MAXADDR,		/* highaddr */
763	    NULL, NULL,			/* filter, filterarg */
764	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsize */
765	    nsegs,			/* nsegments */
766	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsegsize */
767	    0,				/* flags */
768	    NULL, NULL,			/* lockfunc, lockfuncarg */
769	    &rq->dma_tag)))
770		goto done;
771
772	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
773	    &rq->wq_ctrl);
774	if (err)
775		goto err_free_dma_tag;
776
777	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
778
779	err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
780	if (err != 0)
781		goto err_rq_wq_destroy;
782
783	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
784
785	err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
786	if (err)
787		goto err_rq_wq_destroy;
788
789	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
790	for (i = 0; i != wq_sz; i++) {
791		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
792#if (MLX5E_MAX_RX_SEGS == 1)
793		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
794#else
795		int j;
796#endif
797
798		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
799		if (err != 0) {
800			while (i--)
801				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
802			goto err_rq_mbuf_free;
803		}
804
805		/* set value for constant fields */
806#if (MLX5E_MAX_RX_SEGS == 1)
807		wqe->data[0].lkey = c->mkey_be;
808		wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
809#else
810		for (j = 0; j < rq->nsegs; j++)
811			wqe->data[j].lkey = c->mkey_be;
812#endif
813	}
814
815	rq->ifp = c->ifp;
816	rq->channel = c;
817	rq->ix = c->ix;
818
819	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
820	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
821	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
822	    rq->stats.arg);
823	return (0);
824
825err_rq_mbuf_free:
826	free(rq->mbuf, M_MLX5EN);
827	tcp_lro_free(&rq->lro);
828err_rq_wq_destroy:
829	mlx5_wq_destroy(&rq->wq_ctrl);
830err_free_dma_tag:
831	bus_dma_tag_destroy(rq->dma_tag);
832done:
833	return (err);
834}
835
836static void
837mlx5e_destroy_rq(struct mlx5e_rq *rq)
838{
839	int wq_sz;
840	int i;
841
842	/* destroy all sysctl nodes */
843	sysctl_ctx_free(&rq->stats.ctx);
844
845	/* free leftover LRO packets, if any */
846	tcp_lro_free(&rq->lro);
847
848	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
849	for (i = 0; i != wq_sz; i++) {
850		if (rq->mbuf[i].mbuf != NULL) {
851			bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
852			m_freem(rq->mbuf[i].mbuf);
853		}
854		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
855	}
856	free(rq->mbuf, M_MLX5EN);
857	mlx5_wq_destroy(&rq->wq_ctrl);
858}
859
860static int
861mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
862{
863	struct mlx5e_channel *c = rq->channel;
864	struct mlx5e_priv *priv = c->priv;
865	struct mlx5_core_dev *mdev = priv->mdev;
866
867	void *in;
868	void *rqc;
869	void *wq;
870	int inlen;
871	int err;
872
873	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
874	    sizeof(u64) * rq->wq_ctrl.buf.npages;
875	in = mlx5_vzalloc(inlen);
876	if (in == NULL)
877		return (-ENOMEM);
878
879	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
880	wq = MLX5_ADDR_OF(rqc, rqc, wq);
881
882	memcpy(rqc, param->rqc, sizeof(param->rqc));
883
884	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
885	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
886	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
887	if (priv->counter_set_id >= 0)
888		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
889	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
890	    PAGE_SHIFT);
891	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
892
893	mlx5_fill_page_array(&rq->wq_ctrl.buf,
894	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
895
896	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
897
898	kvfree(in);
899
900	return (err);
901}
902
903static int
904mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
905{
906	struct mlx5e_channel *c = rq->channel;
907	struct mlx5e_priv *priv = c->priv;
908	struct mlx5_core_dev *mdev = priv->mdev;
909
910	void *in;
911	void *rqc;
912	int inlen;
913	int err;
914
915	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
916	in = mlx5_vzalloc(inlen);
917	if (in == NULL)
918		return (-ENOMEM);
919
920	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
921
922	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
923	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
924	MLX5_SET(rqc, rqc, state, next_state);
925
926	err = mlx5_core_modify_rq(mdev, in, inlen);
927
928	kvfree(in);
929
930	return (err);
931}
932
933static void
934mlx5e_disable_rq(struct mlx5e_rq *rq)
935{
936	struct mlx5e_channel *c = rq->channel;
937	struct mlx5e_priv *priv = c->priv;
938	struct mlx5_core_dev *mdev = priv->mdev;
939
940	mlx5_core_destroy_rq(mdev, rq->rqn);
941}
942
943static int
944mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
945{
946	struct mlx5e_channel *c = rq->channel;
947	struct mlx5e_priv *priv = c->priv;
948	struct mlx5_wq_ll *wq = &rq->wq;
949	int i;
950
951	for (i = 0; i < 1000; i++) {
952		if (wq->cur_sz >= priv->params.min_rx_wqes)
953			return (0);
954
955		msleep(4);
956	}
957	return (-ETIMEDOUT);
958}
959
960static int
961mlx5e_open_rq(struct mlx5e_channel *c,
962    struct mlx5e_rq_param *param,
963    struct mlx5e_rq *rq)
964{
965	int err;
966
967	err = mlx5e_create_rq(c, param, rq);
968	if (err)
969		return (err);
970
971	err = mlx5e_enable_rq(rq, param);
972	if (err)
973		goto err_destroy_rq;
974
975	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
976	if (err)
977		goto err_disable_rq;
978
979	c->rq.enabled = 1;
980
981	return (0);
982
983err_disable_rq:
984	mlx5e_disable_rq(rq);
985err_destroy_rq:
986	mlx5e_destroy_rq(rq);
987
988	return (err);
989}
990
991static void
992mlx5e_close_rq(struct mlx5e_rq *rq)
993{
994	mtx_lock(&rq->mtx);
995	rq->enabled = 0;
996	callout_stop(&rq->watchdog);
997	mtx_unlock(&rq->mtx);
998
999	callout_drain(&rq->watchdog);
1000
1001	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1002}
1003
1004static void
1005mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1006{
1007	struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
1008
1009	/* wait till RQ is empty */
1010	while (!mlx5_wq_ll_is_empty(&rq->wq) &&
1011	       (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
1012		msleep(4);
1013		rq->cq.mcq.comp(&rq->cq.mcq);
1014	}
1015
1016	mlx5e_disable_rq(rq);
1017	mlx5e_destroy_rq(rq);
1018}
1019
1020void
1021mlx5e_free_sq_db(struct mlx5e_sq *sq)
1022{
1023	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1024	int x;
1025
1026	for (x = 0; x != wq_sz; x++)
1027		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1028	free(sq->mbuf, M_MLX5EN);
1029}
1030
1031int
1032mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1033{
1034	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1035	int err;
1036	int x;
1037
1038	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1039
1040	/* Create DMA descriptor MAPs */
1041	for (x = 0; x != wq_sz; x++) {
1042		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1043		if (err != 0) {
1044			while (x--)
1045				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1046			free(sq->mbuf, M_MLX5EN);
1047			return (err);
1048		}
1049	}
1050	return (0);
1051}
1052
1053static const char *mlx5e_sq_stats_desc[] = {
1054	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1055};
1056
1057void
1058mlx5e_update_sq_inline(struct mlx5e_sq *sq)
1059{
1060	sq->max_inline = sq->priv->params.tx_max_inline;
1061	sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
1062
1063	/*
1064	 * Check if trust state is DSCP or if inline mode is NONE which
1065	 * indicates CX-5 or newer hardware.
1066	 */
1067	if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
1068	    sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
1069		if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
1070			sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
1071		else
1072			sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
1073	} else {
1074		sq->min_insert_caps = 0;
1075	}
1076}
1077
1078static void
1079mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1080{
1081	int i;
1082
1083	for (i = 0; i != c->num_tc; i++) {
1084		mtx_lock(&c->sq[i].lock);
1085		mlx5e_update_sq_inline(&c->sq[i]);
1086		mtx_unlock(&c->sq[i].lock);
1087	}
1088}
1089
1090void
1091mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
1092{
1093	int i;
1094
1095	/* check if channels are closed */
1096	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
1097		return;
1098
1099	for (i = 0; i < priv->params.num_channels; i++)
1100		mlx5e_refresh_sq_inline_sub(priv, priv->channel[i]);
1101}
1102
1103static int
1104mlx5e_create_sq(struct mlx5e_channel *c,
1105    int tc,
1106    struct mlx5e_sq_param *param,
1107    struct mlx5e_sq *sq)
1108{
1109	struct mlx5e_priv *priv = c->priv;
1110	struct mlx5_core_dev *mdev = priv->mdev;
1111	char buffer[16];
1112	void *sqc = param->sqc;
1113	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1114	int err;
1115
1116	/* Create DMA descriptor TAG */
1117	if ((err = -bus_dma_tag_create(
1118	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
1119	    1,				/* any alignment */
1120	    0,				/* no boundary */
1121	    BUS_SPACE_MAXADDR,		/* lowaddr */
1122	    BUS_SPACE_MAXADDR,		/* highaddr */
1123	    NULL, NULL,			/* filter, filterarg */
1124	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
1125	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
1126	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
1127	    0,				/* flags */
1128	    NULL, NULL,			/* lockfunc, lockfuncarg */
1129	    &sq->dma_tag)))
1130		goto done;
1131
1132	err = mlx5_alloc_map_uar(mdev, &sq->uar);
1133	if (err)
1134		goto err_free_dma_tag;
1135
1136	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1137	    &sq->wq_ctrl);
1138	if (err)
1139		goto err_unmap_free_uar;
1140
1141	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1142	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1143
1144	err = mlx5e_alloc_sq_db(sq);
1145	if (err)
1146		goto err_sq_wq_destroy;
1147
1148	sq->mkey_be = c->mkey_be;
1149	sq->ifp = priv->ifp;
1150	sq->priv = priv;
1151	sq->tc = tc;
1152
1153	mlx5e_update_sq_inline(sq);
1154
1155	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1156	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1157	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1158	    sq->stats.arg);
1159
1160	return (0);
1161
1162err_sq_wq_destroy:
1163	mlx5_wq_destroy(&sq->wq_ctrl);
1164
1165err_unmap_free_uar:
1166	mlx5_unmap_free_uar(mdev, &sq->uar);
1167
1168err_free_dma_tag:
1169	bus_dma_tag_destroy(sq->dma_tag);
1170done:
1171	return (err);
1172}
1173
1174static void
1175mlx5e_destroy_sq(struct mlx5e_sq *sq)
1176{
1177	/* destroy all sysctl nodes */
1178	sysctl_ctx_free(&sq->stats.ctx);
1179
1180	mlx5e_free_sq_db(sq);
1181	mlx5_wq_destroy(&sq->wq_ctrl);
1182	mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1183}
1184
1185int
1186mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1187    int tis_num)
1188{
1189	void *in;
1190	void *sqc;
1191	void *wq;
1192	int inlen;
1193	int err;
1194
1195	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1196	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1197	in = mlx5_vzalloc(inlen);
1198	if (in == NULL)
1199		return (-ENOMEM);
1200
1201	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1202	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1203
1204	memcpy(sqc, param->sqc, sizeof(param->sqc));
1205
1206	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1207	MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1208	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1209	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1210	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1211
1212	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1213	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1214	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1215	    PAGE_SHIFT);
1216	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1217
1218	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1219	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1220
1221	err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1222
1223	kvfree(in);
1224
1225	return (err);
1226}
1227
1228int
1229mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1230{
1231	void *in;
1232	void *sqc;
1233	int inlen;
1234	int err;
1235
1236	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1237	in = mlx5_vzalloc(inlen);
1238	if (in == NULL)
1239		return (-ENOMEM);
1240
1241	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1242
1243	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1244	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1245	MLX5_SET(sqc, sqc, state, next_state);
1246
1247	err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1248
1249	kvfree(in);
1250
1251	return (err);
1252}
1253
1254void
1255mlx5e_disable_sq(struct mlx5e_sq *sq)
1256{
1257
1258	mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1259}
1260
1261static int
1262mlx5e_open_sq(struct mlx5e_channel *c,
1263    int tc,
1264    struct mlx5e_sq_param *param,
1265    struct mlx5e_sq *sq)
1266{
1267	int err;
1268
1269	err = mlx5e_create_sq(c, tc, param, sq);
1270	if (err)
1271		return (err);
1272
1273	err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1274	if (err)
1275		goto err_destroy_sq;
1276
1277	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1278	if (err)
1279		goto err_disable_sq;
1280
1281	return (0);
1282
1283err_disable_sq:
1284	mlx5e_disable_sq(sq);
1285err_destroy_sq:
1286	mlx5e_destroy_sq(sq);
1287
1288	return (err);
1289}
1290
1291static void
1292mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1293{
1294	/* fill up remainder with NOPs */
1295	while (sq->cev_counter != 0) {
1296		while (!mlx5e_sq_has_room_for(sq, 1)) {
1297			if (can_sleep != 0) {
1298				mtx_unlock(&sq->lock);
1299				msleep(4);
1300				mtx_lock(&sq->lock);
1301			} else {
1302				goto done;
1303			}
1304		}
1305		/* send a single NOP */
1306		mlx5e_send_nop(sq, 1);
1307		atomic_thread_fence_rel();
1308	}
1309done:
1310	/* Check if we need to write the doorbell */
1311	if (likely(sq->doorbell.d64 != 0)) {
1312		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1313		sq->doorbell.d64 = 0;
1314	}
1315}
1316
1317void
1318mlx5e_sq_cev_timeout(void *arg)
1319{
1320	struct mlx5e_sq *sq = arg;
1321
1322	mtx_assert(&sq->lock, MA_OWNED);
1323
1324	/* check next state */
1325	switch (sq->cev_next_state) {
1326	case MLX5E_CEV_STATE_SEND_NOPS:
1327		/* fill TX ring with NOPs, if any */
1328		mlx5e_sq_send_nops_locked(sq, 0);
1329
1330		/* check if completed */
1331		if (sq->cev_counter == 0) {
1332			sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1333			return;
1334		}
1335		break;
1336	default:
1337		/* send NOPs on next timeout */
1338		sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1339		break;
1340	}
1341
1342	/* restart timer */
1343	callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1344}
1345
1346void
1347mlx5e_drain_sq(struct mlx5e_sq *sq)
1348{
1349	int error;
1350	struct mlx5_core_dev *mdev= sq->priv->mdev;
1351
1352	/*
1353	 * Check if already stopped.
1354	 *
1355	 * NOTE: The "stopped" variable is only written when both the
1356	 * priv's configuration lock and the SQ's lock is locked. It
1357	 * can therefore safely be read when only one of the two locks
1358	 * is locked. This function is always called when the priv's
1359	 * configuration lock is locked.
1360	 */
1361	if (sq->stopped != 0)
1362		return;
1363
1364	mtx_lock(&sq->lock);
1365
1366	/* don't put more packets into the SQ */
1367	sq->stopped = 1;
1368
1369	/* teardown event factor timer, if any */
1370	sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1371	callout_stop(&sq->cev_callout);
1372
1373	/* send dummy NOPs in order to flush the transmit ring */
1374	mlx5e_sq_send_nops_locked(sq, 1);
1375	mtx_unlock(&sq->lock);
1376
1377	/* make sure it is safe to free the callout */
1378	callout_drain(&sq->cev_callout);
1379
1380	/* wait till SQ is empty or link is down */
1381	mtx_lock(&sq->lock);
1382	while (sq->cc != sq->pc &&
1383	    (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
1384	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1385		mtx_unlock(&sq->lock);
1386		msleep(1);
1387		sq->cq.mcq.comp(&sq->cq.mcq);
1388		mtx_lock(&sq->lock);
1389	}
1390	mtx_unlock(&sq->lock);
1391
1392	/* error out remaining requests */
1393	error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1394	if (error != 0) {
1395		if_printf(sq->ifp,
1396		    "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1397	}
1398
1399	/* wait till SQ is empty */
1400	mtx_lock(&sq->lock);
1401	while (sq->cc != sq->pc &&
1402	       mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1403		mtx_unlock(&sq->lock);
1404		msleep(1);
1405		sq->cq.mcq.comp(&sq->cq.mcq);
1406		mtx_lock(&sq->lock);
1407	}
1408	mtx_unlock(&sq->lock);
1409}
1410
1411static void
1412mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1413{
1414
1415	mlx5e_drain_sq(sq);
1416	mlx5e_disable_sq(sq);
1417	mlx5e_destroy_sq(sq);
1418}
1419
1420static int
1421mlx5e_create_cq(struct mlx5e_priv *priv,
1422    struct mlx5e_cq_param *param,
1423    struct mlx5e_cq *cq,
1424    mlx5e_cq_comp_t *comp,
1425    int eq_ix)
1426{
1427	struct mlx5_core_dev *mdev = priv->mdev;
1428	struct mlx5_core_cq *mcq = &cq->mcq;
1429	int eqn_not_used;
1430	int irqn;
1431	int err;
1432	u32 i;
1433
1434	param->wq.buf_numa_node = 0;
1435	param->wq.db_numa_node = 0;
1436
1437	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1438	    &cq->wq_ctrl);
1439	if (err)
1440		return (err);
1441
1442	mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1443
1444	mcq->cqe_sz = 64;
1445	mcq->set_ci_db = cq->wq_ctrl.db.db;
1446	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1447	*mcq->set_ci_db = 0;
1448	*mcq->arm_db = 0;
1449	mcq->vector = eq_ix;
1450	mcq->comp = comp;
1451	mcq->event = mlx5e_cq_error_event;
1452	mcq->irqn = irqn;
1453	mcq->uar = &priv->cq_uar;
1454
1455	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1456		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1457
1458		cqe->op_own = 0xf1;
1459	}
1460
1461	cq->priv = priv;
1462
1463	return (0);
1464}
1465
1466static void
1467mlx5e_destroy_cq(struct mlx5e_cq *cq)
1468{
1469	mlx5_wq_destroy(&cq->wq_ctrl);
1470}
1471
1472static int
1473mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1474{
1475	struct mlx5_core_cq *mcq = &cq->mcq;
1476	void *in;
1477	void *cqc;
1478	int inlen;
1479	int irqn_not_used;
1480	int eqn;
1481	int err;
1482
1483	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1484	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1485	in = mlx5_vzalloc(inlen);
1486	if (in == NULL)
1487		return (-ENOMEM);
1488
1489	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1490
1491	memcpy(cqc, param->cqc, sizeof(param->cqc));
1492
1493	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1494	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1495
1496	mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1497
1498	MLX5_SET(cqc, cqc, c_eqn, eqn);
1499	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1500	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1501	    PAGE_SHIFT);
1502	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1503
1504	err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1505
1506	kvfree(in);
1507
1508	if (err)
1509		return (err);
1510
1511	mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1512
1513	return (0);
1514}
1515
1516static void
1517mlx5e_disable_cq(struct mlx5e_cq *cq)
1518{
1519
1520	mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1521}
1522
1523int
1524mlx5e_open_cq(struct mlx5e_priv *priv,
1525    struct mlx5e_cq_param *param,
1526    struct mlx5e_cq *cq,
1527    mlx5e_cq_comp_t *comp,
1528    int eq_ix)
1529{
1530	int err;
1531
1532	err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1533	if (err)
1534		return (err);
1535
1536	err = mlx5e_enable_cq(cq, param, eq_ix);
1537	if (err)
1538		goto err_destroy_cq;
1539
1540	return (0);
1541
1542err_destroy_cq:
1543	mlx5e_destroy_cq(cq);
1544
1545	return (err);
1546}
1547
1548void
1549mlx5e_close_cq(struct mlx5e_cq *cq)
1550{
1551	mlx5e_disable_cq(cq);
1552	mlx5e_destroy_cq(cq);
1553}
1554
1555static int
1556mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1557    struct mlx5e_channel_param *cparam)
1558{
1559	int err;
1560	int tc;
1561
1562	for (tc = 0; tc < c->num_tc; tc++) {
1563		/* open completion queue */
1564		err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1565		    &mlx5e_tx_cq_comp, c->ix);
1566		if (err)
1567			goto err_close_tx_cqs;
1568	}
1569	return (0);
1570
1571err_close_tx_cqs:
1572	for (tc--; tc >= 0; tc--)
1573		mlx5e_close_cq(&c->sq[tc].cq);
1574
1575	return (err);
1576}
1577
1578static void
1579mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1580{
1581	int tc;
1582
1583	for (tc = 0; tc < c->num_tc; tc++)
1584		mlx5e_close_cq(&c->sq[tc].cq);
1585}
1586
1587static int
1588mlx5e_open_sqs(struct mlx5e_channel *c,
1589    struct mlx5e_channel_param *cparam)
1590{
1591	int err;
1592	int tc;
1593
1594	for (tc = 0; tc < c->num_tc; tc++) {
1595		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1596		if (err)
1597			goto err_close_sqs;
1598	}
1599
1600	return (0);
1601
1602err_close_sqs:
1603	for (tc--; tc >= 0; tc--)
1604		mlx5e_close_sq_wait(&c->sq[tc]);
1605
1606	return (err);
1607}
1608
1609static void
1610mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1611{
1612	int tc;
1613
1614	for (tc = 0; tc < c->num_tc; tc++)
1615		mlx5e_close_sq_wait(&c->sq[tc]);
1616}
1617
1618static void
1619mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1620{
1621	int tc;
1622
1623	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1624
1625	callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1626
1627	for (tc = 0; tc < c->num_tc; tc++) {
1628		struct mlx5e_sq *sq = c->sq + tc;
1629
1630		mtx_init(&sq->lock, "mlx5tx",
1631		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1632		mtx_init(&sq->comp_lock, "mlx5comp",
1633		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1634
1635		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1636
1637		sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1638
1639		/* ensure the TX completion event factor is not zero */
1640		if (sq->cev_factor == 0)
1641			sq->cev_factor = 1;
1642	}
1643}
1644
1645static void
1646mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1647{
1648	int tc;
1649
1650	mtx_destroy(&c->rq.mtx);
1651
1652	for (tc = 0; tc < c->num_tc; tc++) {
1653		mtx_destroy(&c->sq[tc].lock);
1654		mtx_destroy(&c->sq[tc].comp_lock);
1655	}
1656}
1657
1658static int
1659mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1660    struct mlx5e_channel_param *cparam,
1661    struct mlx5e_channel *volatile *cp)
1662{
1663	struct mlx5e_channel *c;
1664	int err;
1665
1666	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1667	c->priv = priv;
1668	c->ix = ix;
1669	c->cpu = 0;
1670	c->ifp = priv->ifp;
1671	c->mkey_be = cpu_to_be32(priv->mr.key);
1672	c->num_tc = priv->num_tc;
1673
1674	/* init mutexes */
1675	mlx5e_chan_mtx_init(c);
1676
1677	/* open transmit completion queue */
1678	err = mlx5e_open_tx_cqs(c, cparam);
1679	if (err)
1680		goto err_free;
1681
1682	/* open receive completion queue */
1683	err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1684	    &mlx5e_rx_cq_comp, c->ix);
1685	if (err)
1686		goto err_close_tx_cqs;
1687
1688	err = mlx5e_open_sqs(c, cparam);
1689	if (err)
1690		goto err_close_rx_cq;
1691
1692	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1693	if (err)
1694		goto err_close_sqs;
1695
1696	/* store channel pointer */
1697	*cp = c;
1698
1699	/* poll receive queue initially */
1700	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1701
1702	return (0);
1703
1704err_close_sqs:
1705	mlx5e_close_sqs_wait(c);
1706
1707err_close_rx_cq:
1708	mlx5e_close_cq(&c->rq.cq);
1709
1710err_close_tx_cqs:
1711	mlx5e_close_tx_cqs(c);
1712
1713err_free:
1714	/* destroy mutexes */
1715	mlx5e_chan_mtx_destroy(c);
1716	free(c, M_MLX5EN);
1717	return (err);
1718}
1719
1720static void
1721mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1722{
1723	struct mlx5e_channel *c = *pp;
1724
1725	/* check if channel is already closed */
1726	if (c == NULL)
1727		return;
1728	mlx5e_close_rq(&c->rq);
1729}
1730
1731static void
1732mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1733{
1734	struct mlx5e_channel *c = *pp;
1735
1736	/* check if channel is already closed */
1737	if (c == NULL)
1738		return;
1739	/* ensure channel pointer is no longer used */
1740	*pp = NULL;
1741
1742	mlx5e_close_rq_wait(&c->rq);
1743	mlx5e_close_sqs_wait(c);
1744	mlx5e_close_cq(&c->rq.cq);
1745	mlx5e_close_tx_cqs(c);
1746	/* destroy mutexes */
1747	mlx5e_chan_mtx_destroy(c);
1748	free(c, M_MLX5EN);
1749}
1750
1751static int
1752mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
1753{
1754	u32 r, n;
1755
1756	r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
1757	    MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
1758	if (r > MJUM16BYTES)
1759		return (-ENOMEM);
1760
1761	if (r > MJUM9BYTES)
1762		r = MJUM16BYTES;
1763	else if (r > MJUMPAGESIZE)
1764		r = MJUM9BYTES;
1765	else if (r > MCLBYTES)
1766		r = MJUMPAGESIZE;
1767	else
1768		r = MCLBYTES;
1769
1770	/*
1771	 * n + 1 must be a power of two, because stride size must be.
1772	 * Stride size is 16 * (n + 1), as the first segment is
1773	 * control.
1774	 */
1775	for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
1776		;
1777
1778	*wqe_sz = r;
1779	*nsegs = n;
1780	return (0);
1781}
1782
1783static void
1784mlx5e_build_rq_param(struct mlx5e_priv *priv,
1785    struct mlx5e_rq_param *param)
1786{
1787	void *rqc = param->rqc;
1788	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1789	u32 wqe_sz, nsegs;
1790
1791	mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
1792	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1793	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1794	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
1795	    nsegs * sizeof(struct mlx5_wqe_data_seg)));
1796	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1797	MLX5_SET(wq, wq, pd, priv->pdn);
1798
1799	param->wq.buf_numa_node = 0;
1800	param->wq.db_numa_node = 0;
1801	param->wq.linear = 1;
1802}
1803
1804static void
1805mlx5e_build_sq_param(struct mlx5e_priv *priv,
1806    struct mlx5e_sq_param *param)
1807{
1808	void *sqc = param->sqc;
1809	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1810
1811	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1812	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1813	MLX5_SET(wq, wq, pd, priv->pdn);
1814
1815	param->wq.buf_numa_node = 0;
1816	param->wq.db_numa_node = 0;
1817	param->wq.linear = 1;
1818}
1819
1820static void
1821mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1822    struct mlx5e_cq_param *param)
1823{
1824	void *cqc = param->cqc;
1825
1826	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1827}
1828
1829static void
1830mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1831    struct mlx5e_cq_param *param)
1832{
1833	void *cqc = param->cqc;
1834
1835
1836	/*
1837	 * TODO The sysctl to control on/off is a bool value for now, which means
1838	 * we only support CSUM, once HASH is implemnted we'll need to address that.
1839	 */
1840	if (priv->params.cqe_zipping_en) {
1841		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1842		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1843	}
1844
1845	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1846	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1847	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1848
1849	switch (priv->params.rx_cq_moderation_mode) {
1850	case 0:
1851		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1852		break;
1853	default:
1854		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1855			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1856		else
1857			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1858		break;
1859	}
1860
1861	mlx5e_build_common_cq_param(priv, param);
1862}
1863
1864static void
1865mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1866    struct mlx5e_cq_param *param)
1867{
1868	void *cqc = param->cqc;
1869
1870	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1871	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1872	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1873
1874	switch (priv->params.tx_cq_moderation_mode) {
1875	case 0:
1876		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1877		break;
1878	default:
1879		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1880			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1881		else
1882			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1883		break;
1884	}
1885
1886	mlx5e_build_common_cq_param(priv, param);
1887}
1888
1889static void
1890mlx5e_build_channel_param(struct mlx5e_priv *priv,
1891    struct mlx5e_channel_param *cparam)
1892{
1893	memset(cparam, 0, sizeof(*cparam));
1894
1895	mlx5e_build_rq_param(priv, &cparam->rq);
1896	mlx5e_build_sq_param(priv, &cparam->sq);
1897	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1898	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1899}
1900
1901static int
1902mlx5e_open_channels(struct mlx5e_priv *priv)
1903{
1904	struct mlx5e_channel_param cparam;
1905	void *ptr;
1906	int err;
1907	int i;
1908	int j;
1909
1910	priv->channel = malloc(priv->params.num_channels *
1911	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1912
1913	mlx5e_build_channel_param(priv, &cparam);
1914	for (i = 0; i < priv->params.num_channels; i++) {
1915		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1916		if (err)
1917			goto err_close_channels;
1918	}
1919
1920	for (j = 0; j < priv->params.num_channels; j++) {
1921		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1922		if (err)
1923			goto err_close_channels;
1924	}
1925
1926	return (0);
1927
1928err_close_channels:
1929	for (i--; i >= 0; i--) {
1930		mlx5e_close_channel(&priv->channel[i]);
1931		mlx5e_close_channel_wait(&priv->channel[i]);
1932	}
1933
1934	/* remove "volatile" attribute from "channel" pointer */
1935	ptr = __DECONST(void *, priv->channel);
1936	priv->channel = NULL;
1937
1938	free(ptr, M_MLX5EN);
1939
1940	return (err);
1941}
1942
1943static void
1944mlx5e_close_channels(struct mlx5e_priv *priv)
1945{
1946	void *ptr;
1947	int i;
1948
1949	if (priv->channel == NULL)
1950		return;
1951
1952	for (i = 0; i < priv->params.num_channels; i++)
1953		mlx5e_close_channel(&priv->channel[i]);
1954	for (i = 0; i < priv->params.num_channels; i++)
1955		mlx5e_close_channel_wait(&priv->channel[i]);
1956
1957	/* remove "volatile" attribute from "channel" pointer */
1958	ptr = __DECONST(void *, priv->channel);
1959	priv->channel = NULL;
1960
1961	free(ptr, M_MLX5EN);
1962}
1963
1964static int
1965mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1966{
1967
1968	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1969		uint8_t cq_mode;
1970
1971		switch (priv->params.tx_cq_moderation_mode) {
1972		case 0:
1973			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1974			break;
1975		default:
1976			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1977			break;
1978		}
1979
1980		return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
1981		    priv->params.tx_cq_moderation_usec,
1982		    priv->params.tx_cq_moderation_pkts,
1983		    cq_mode));
1984	}
1985
1986	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
1987	    priv->params.tx_cq_moderation_usec,
1988	    priv->params.tx_cq_moderation_pkts));
1989}
1990
1991static int
1992mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
1993{
1994
1995	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1996		uint8_t cq_mode;
1997		int retval;
1998
1999		switch (priv->params.rx_cq_moderation_mode) {
2000		case 0:
2001			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2002			break;
2003		default:
2004			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2005			break;
2006		}
2007
2008		retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2009		    priv->params.rx_cq_moderation_usec,
2010		    priv->params.rx_cq_moderation_pkts,
2011		    cq_mode);
2012
2013		return (retval);
2014	}
2015
2016	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
2017	    priv->params.rx_cq_moderation_usec,
2018	    priv->params.rx_cq_moderation_pkts));
2019}
2020
2021static int
2022mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
2023{
2024	int err;
2025	int i;
2026
2027	if (c == NULL)
2028		return (EINVAL);
2029
2030	err = mlx5e_refresh_rq_params(priv, &c->rq);
2031	if (err)
2032		goto done;
2033
2034	for (i = 0; i != c->num_tc; i++) {
2035		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2036		if (err)
2037			goto done;
2038	}
2039done:
2040	return (err);
2041}
2042
2043int
2044mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2045{
2046	int i;
2047
2048	if (priv->channel == NULL)
2049		return (EINVAL);
2050
2051	for (i = 0; i < priv->params.num_channels; i++) {
2052		int err;
2053
2054		err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
2055		if (err)
2056			return (err);
2057	}
2058	return (0);
2059}
2060
2061static int
2062mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2063{
2064	struct mlx5_core_dev *mdev = priv->mdev;
2065	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2066	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2067
2068	memset(in, 0, sizeof(in));
2069
2070	MLX5_SET(tisc, tisc, prio, tc);
2071	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2072
2073	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2074}
2075
2076static void
2077mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2078{
2079	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2080}
2081
2082static int
2083mlx5e_open_tises(struct mlx5e_priv *priv)
2084{
2085	int num_tc = priv->num_tc;
2086	int err;
2087	int tc;
2088
2089	for (tc = 0; tc < num_tc; tc++) {
2090		err = mlx5e_open_tis(priv, tc);
2091		if (err)
2092			goto err_close_tises;
2093	}
2094
2095	return (0);
2096
2097err_close_tises:
2098	for (tc--; tc >= 0; tc--)
2099		mlx5e_close_tis(priv, tc);
2100
2101	return (err);
2102}
2103
2104static void
2105mlx5e_close_tises(struct mlx5e_priv *priv)
2106{
2107	int num_tc = priv->num_tc;
2108	int tc;
2109
2110	for (tc = 0; tc < num_tc; tc++)
2111		mlx5e_close_tis(priv, tc);
2112}
2113
2114static int
2115mlx5e_open_rqt(struct mlx5e_priv *priv)
2116{
2117	struct mlx5_core_dev *mdev = priv->mdev;
2118	u32 *in;
2119	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
2120	void *rqtc;
2121	int inlen;
2122	int err;
2123	int sz;
2124	int i;
2125
2126	sz = 1 << priv->params.rx_hash_log_tbl_sz;
2127
2128	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2129	in = mlx5_vzalloc(inlen);
2130	if (in == NULL)
2131		return (-ENOMEM);
2132	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2133
2134	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2135	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2136
2137	for (i = 0; i < sz; i++) {
2138		int ix = i;
2139#ifdef RSS
2140		ix = rss_get_indirection_to_bucket(ix);
2141#endif
2142		/* ensure we don't overflow */
2143		ix %= priv->params.num_channels;
2144
2145		/* apply receive side scaling stride, if any */
2146		ix -= ix % (int)priv->params.channels_rsss;
2147
2148		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
2149	}
2150
2151	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2152
2153	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
2154	if (!err)
2155		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2156
2157	kvfree(in);
2158
2159	return (err);
2160}
2161
2162static void
2163mlx5e_close_rqt(struct mlx5e_priv *priv)
2164{
2165	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
2166	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
2167
2168	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2169	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2170
2171	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
2172}
2173
2174static void
2175mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2176{
2177	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2178	__be32 *hkey;
2179
2180	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2181
2182#define	ROUGH_MAX_L2_L3_HDR_SZ 256
2183
2184#define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2185			  MLX5_HASH_FIELD_SEL_DST_IP)
2186
2187#define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2188			  MLX5_HASH_FIELD_SEL_DST_IP   |\
2189			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
2190			  MLX5_HASH_FIELD_SEL_L4_DPORT)
2191
2192#define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
2193				 MLX5_HASH_FIELD_SEL_DST_IP   |\
2194				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2195
2196	if (priv->params.hw_lro_en) {
2197		MLX5_SET(tirc, tirc, lro_enable_mask,
2198		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2199		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2200		MLX5_SET(tirc, tirc, lro_max_msg_sz,
2201		    (priv->params.lro_wqe_sz -
2202		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2203		/* TODO: add the option to choose timer value dynamically */
2204		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2205		    MLX5_CAP_ETH(priv->mdev,
2206		    lro_timer_supported_periods[2]));
2207	}
2208
2209	/* setup parameters for hashing TIR type, if any */
2210	switch (tt) {
2211	case MLX5E_TT_ANY:
2212		MLX5_SET(tirc, tirc, disp_type,
2213		    MLX5_TIRC_DISP_TYPE_DIRECT);
2214		MLX5_SET(tirc, tirc, inline_rqn,
2215		    priv->channel[0]->rq.rqn);
2216		break;
2217	default:
2218		MLX5_SET(tirc, tirc, disp_type,
2219		    MLX5_TIRC_DISP_TYPE_INDIRECT);
2220		MLX5_SET(tirc, tirc, indirect_table,
2221		    priv->rqtn);
2222		MLX5_SET(tirc, tirc, rx_hash_fn,
2223		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2224		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2225#ifdef RSS
2226		/*
2227		 * The FreeBSD RSS implementation does currently not
2228		 * support symmetric Toeplitz hashes:
2229		 */
2230		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2231		rss_getkey((uint8_t *)hkey);
2232#else
2233		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2234		hkey[0] = cpu_to_be32(0xD181C62C);
2235		hkey[1] = cpu_to_be32(0xF7F4DB5B);
2236		hkey[2] = cpu_to_be32(0x1983A2FC);
2237		hkey[3] = cpu_to_be32(0x943E1ADB);
2238		hkey[4] = cpu_to_be32(0xD9389E6B);
2239		hkey[5] = cpu_to_be32(0xD1039C2C);
2240		hkey[6] = cpu_to_be32(0xA74499AD);
2241		hkey[7] = cpu_to_be32(0x593D56D9);
2242		hkey[8] = cpu_to_be32(0xF3253C06);
2243		hkey[9] = cpu_to_be32(0x2ADC1FFC);
2244#endif
2245		break;
2246	}
2247
2248	switch (tt) {
2249	case MLX5E_TT_IPV4_TCP:
2250		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2251		    MLX5_L3_PROT_TYPE_IPV4);
2252		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2253		    MLX5_L4_PROT_TYPE_TCP);
2254#ifdef RSS
2255		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2256			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2257			    MLX5_HASH_IP);
2258		} else
2259#endif
2260		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2261		    MLX5_HASH_ALL);
2262		break;
2263
2264	case MLX5E_TT_IPV6_TCP:
2265		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2266		    MLX5_L3_PROT_TYPE_IPV6);
2267		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2268		    MLX5_L4_PROT_TYPE_TCP);
2269#ifdef RSS
2270		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2271			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2272			    MLX5_HASH_IP);
2273		} else
2274#endif
2275		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2276		    MLX5_HASH_ALL);
2277		break;
2278
2279	case MLX5E_TT_IPV4_UDP:
2280		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2281		    MLX5_L3_PROT_TYPE_IPV4);
2282		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2283		    MLX5_L4_PROT_TYPE_UDP);
2284#ifdef RSS
2285		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2286			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2287			    MLX5_HASH_IP);
2288		} else
2289#endif
2290		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2291		    MLX5_HASH_ALL);
2292		break;
2293
2294	case MLX5E_TT_IPV6_UDP:
2295		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2296		    MLX5_L3_PROT_TYPE_IPV6);
2297		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2298		    MLX5_L4_PROT_TYPE_UDP);
2299#ifdef RSS
2300		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2301			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2302			    MLX5_HASH_IP);
2303		} else
2304#endif
2305		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2306		    MLX5_HASH_ALL);
2307		break;
2308
2309	case MLX5E_TT_IPV4_IPSEC_AH:
2310		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2311		    MLX5_L3_PROT_TYPE_IPV4);
2312		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2313		    MLX5_HASH_IP_IPSEC_SPI);
2314		break;
2315
2316	case MLX5E_TT_IPV6_IPSEC_AH:
2317		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2318		    MLX5_L3_PROT_TYPE_IPV6);
2319		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2320		    MLX5_HASH_IP_IPSEC_SPI);
2321		break;
2322
2323	case MLX5E_TT_IPV4_IPSEC_ESP:
2324		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2325		    MLX5_L3_PROT_TYPE_IPV4);
2326		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2327		    MLX5_HASH_IP_IPSEC_SPI);
2328		break;
2329
2330	case MLX5E_TT_IPV6_IPSEC_ESP:
2331		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2332		    MLX5_L3_PROT_TYPE_IPV6);
2333		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2334		    MLX5_HASH_IP_IPSEC_SPI);
2335		break;
2336
2337	case MLX5E_TT_IPV4:
2338		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2339		    MLX5_L3_PROT_TYPE_IPV4);
2340		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2341		    MLX5_HASH_IP);
2342		break;
2343
2344	case MLX5E_TT_IPV6:
2345		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2346		    MLX5_L3_PROT_TYPE_IPV6);
2347		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2348		    MLX5_HASH_IP);
2349		break;
2350
2351	default:
2352		break;
2353	}
2354}
2355
2356static int
2357mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2358{
2359	struct mlx5_core_dev *mdev = priv->mdev;
2360	u32 *in;
2361	void *tirc;
2362	int inlen;
2363	int err;
2364
2365	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2366	in = mlx5_vzalloc(inlen);
2367	if (in == NULL)
2368		return (-ENOMEM);
2369	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2370
2371	mlx5e_build_tir_ctx(priv, tirc, tt);
2372
2373	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2374
2375	kvfree(in);
2376
2377	return (err);
2378}
2379
2380static void
2381mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2382{
2383	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2384}
2385
2386static int
2387mlx5e_open_tirs(struct mlx5e_priv *priv)
2388{
2389	int err;
2390	int i;
2391
2392	for (i = 0; i < MLX5E_NUM_TT; i++) {
2393		err = mlx5e_open_tir(priv, i);
2394		if (err)
2395			goto err_close_tirs;
2396	}
2397
2398	return (0);
2399
2400err_close_tirs:
2401	for (i--; i >= 0; i--)
2402		mlx5e_close_tir(priv, i);
2403
2404	return (err);
2405}
2406
2407static void
2408mlx5e_close_tirs(struct mlx5e_priv *priv)
2409{
2410	int i;
2411
2412	for (i = 0; i < MLX5E_NUM_TT; i++)
2413		mlx5e_close_tir(priv, i);
2414}
2415
2416/*
2417 * SW MTU does not include headers,
2418 * HW MTU includes all headers and checksums.
2419 */
2420static int
2421mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2422{
2423	struct mlx5e_priv *priv = ifp->if_softc;
2424	struct mlx5_core_dev *mdev = priv->mdev;
2425	int hw_mtu;
2426	int err;
2427
2428	hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
2429
2430	err = mlx5_set_port_mtu(mdev, hw_mtu);
2431	if (err) {
2432		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2433		    __func__, sw_mtu, err);
2434		return (err);
2435	}
2436
2437	/* Update vport context MTU */
2438	err = mlx5_set_vport_mtu(mdev, hw_mtu);
2439	if (err) {
2440		if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
2441		    __func__, err);
2442	}
2443
2444	ifp->if_mtu = sw_mtu;
2445
2446	err = mlx5_query_vport_mtu(mdev, &hw_mtu);
2447	if (err || !hw_mtu) {
2448		/* fallback to port oper mtu */
2449		err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2450	}
2451	if (err) {
2452		if_printf(ifp, "Query port MTU, after setting new "
2453		    "MTU value, failed\n");
2454		return (err);
2455	} else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2456		err = -E2BIG,
2457		if_printf(ifp, "Port MTU %d is smaller than "
2458                    "ifp mtu %d\n", hw_mtu, sw_mtu);
2459	} else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2460		err = -EINVAL;
2461                if_printf(ifp, "Port MTU %d is bigger than "
2462                    "ifp mtu %d\n", hw_mtu, sw_mtu);
2463	}
2464	priv->params_ethtool.hw_mtu = hw_mtu;
2465
2466	return (err);
2467}
2468
2469int
2470mlx5e_open_locked(struct ifnet *ifp)
2471{
2472	struct mlx5e_priv *priv = ifp->if_softc;
2473	int err;
2474	u16 set_id;
2475
2476	/* check if already opened */
2477	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2478		return (0);
2479
2480#ifdef RSS
2481	if (rss_getnumbuckets() > priv->params.num_channels) {
2482		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2483		    "channels(%u) available\n", rss_getnumbuckets(),
2484		    priv->params.num_channels);
2485	}
2486#endif
2487	err = mlx5e_open_tises(priv);
2488	if (err) {
2489		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2490		    __func__, err);
2491		return (err);
2492	}
2493	err = mlx5_vport_alloc_q_counter(priv->mdev,
2494	    MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2495	if (err) {
2496		if_printf(priv->ifp,
2497		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2498		    __func__, err);
2499		goto err_close_tises;
2500	}
2501	/* store counter set ID */
2502	priv->counter_set_id = set_id;
2503
2504	err = mlx5e_open_channels(priv);
2505	if (err) {
2506		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2507		    __func__, err);
2508		goto err_dalloc_q_counter;
2509	}
2510	err = mlx5e_open_rqt(priv);
2511	if (err) {
2512		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2513		    __func__, err);
2514		goto err_close_channels;
2515	}
2516	err = mlx5e_open_tirs(priv);
2517	if (err) {
2518		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2519		    __func__, err);
2520		goto err_close_rqls;
2521	}
2522	err = mlx5e_open_flow_table(priv);
2523	if (err) {
2524		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2525		    __func__, err);
2526		goto err_close_tirs;
2527	}
2528	err = mlx5e_add_all_vlan_rules(priv);
2529	if (err) {
2530		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2531		    __func__, err);
2532		goto err_close_flow_table;
2533	}
2534	set_bit(MLX5E_STATE_OPENED, &priv->state);
2535
2536	mlx5e_update_carrier(priv);
2537	mlx5e_set_rx_mode_core(priv);
2538
2539	return (0);
2540
2541err_close_flow_table:
2542	mlx5e_close_flow_table(priv);
2543
2544err_close_tirs:
2545	mlx5e_close_tirs(priv);
2546
2547err_close_rqls:
2548	mlx5e_close_rqt(priv);
2549
2550err_close_channels:
2551	mlx5e_close_channels(priv);
2552
2553err_dalloc_q_counter:
2554	mlx5_vport_dealloc_q_counter(priv->mdev,
2555	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2556
2557err_close_tises:
2558	mlx5e_close_tises(priv);
2559
2560	return (err);
2561}
2562
2563static void
2564mlx5e_open(void *arg)
2565{
2566	struct mlx5e_priv *priv = arg;
2567
2568	PRIV_LOCK(priv);
2569	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2570		if_printf(priv->ifp,
2571		    "%s: Setting port status to up failed\n",
2572		    __func__);
2573
2574	mlx5e_open_locked(priv->ifp);
2575	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2576	PRIV_UNLOCK(priv);
2577}
2578
2579int
2580mlx5e_close_locked(struct ifnet *ifp)
2581{
2582	struct mlx5e_priv *priv = ifp->if_softc;
2583
2584	/* check if already closed */
2585	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2586		return (0);
2587
2588	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2589
2590	mlx5e_set_rx_mode_core(priv);
2591	mlx5e_del_all_vlan_rules(priv);
2592	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2593	mlx5e_close_flow_table(priv);
2594	mlx5e_close_tirs(priv);
2595	mlx5e_close_rqt(priv);
2596	mlx5e_close_channels(priv);
2597	mlx5_vport_dealloc_q_counter(priv->mdev,
2598	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2599	mlx5e_close_tises(priv);
2600
2601	return (0);
2602}
2603
2604#if (__FreeBSD_version >= 1100000)
2605static uint64_t
2606mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2607{
2608	struct mlx5e_priv *priv = ifp->if_softc;
2609	u64 retval;
2610
2611	/* PRIV_LOCK(priv); XXX not allowed */
2612	switch (cnt) {
2613	case IFCOUNTER_IPACKETS:
2614		retval = priv->stats.vport.rx_packets;
2615		break;
2616	case IFCOUNTER_IERRORS:
2617		retval = priv->stats.vport.rx_error_packets +
2618		    priv->stats.pport.alignment_err +
2619		    priv->stats.pport.check_seq_err +
2620		    priv->stats.pport.crc_align_errors +
2621		    priv->stats.pport.in_range_len_errors +
2622		    priv->stats.pport.jabbers +
2623		    priv->stats.pport.out_of_range_len +
2624		    priv->stats.pport.oversize_pkts +
2625		    priv->stats.pport.symbol_err +
2626		    priv->stats.pport.too_long_errors +
2627		    priv->stats.pport.undersize_pkts +
2628		    priv->stats.pport.unsupported_op_rx;
2629		break;
2630	case IFCOUNTER_IQDROPS:
2631		retval = priv->stats.vport.rx_out_of_buffer +
2632		    priv->stats.pport.drop_events;
2633		break;
2634	case IFCOUNTER_OPACKETS:
2635		retval = priv->stats.vport.tx_packets;
2636		break;
2637	case IFCOUNTER_OERRORS:
2638		retval = priv->stats.vport.tx_error_packets;
2639		break;
2640	case IFCOUNTER_IBYTES:
2641		retval = priv->stats.vport.rx_bytes;
2642		break;
2643	case IFCOUNTER_OBYTES:
2644		retval = priv->stats.vport.tx_bytes;
2645		break;
2646	case IFCOUNTER_IMCASTS:
2647		retval = priv->stats.vport.rx_multicast_packets;
2648		break;
2649	case IFCOUNTER_OMCASTS:
2650		retval = priv->stats.vport.tx_multicast_packets;
2651		break;
2652	case IFCOUNTER_OQDROPS:
2653		retval = priv->stats.vport.tx_queue_dropped;
2654		break;
2655	case IFCOUNTER_COLLISIONS:
2656		retval = priv->stats.pport.collisions;
2657		break;
2658	default:
2659		retval = if_get_counter_default(ifp, cnt);
2660		break;
2661	}
2662	/* PRIV_UNLOCK(priv); XXX not allowed */
2663	return (retval);
2664}
2665#endif
2666
2667static void
2668mlx5e_set_rx_mode(struct ifnet *ifp)
2669{
2670	struct mlx5e_priv *priv = ifp->if_softc;
2671
2672	queue_work(priv->wq, &priv->set_rx_mode_work);
2673}
2674
2675static int
2676mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2677{
2678	struct mlx5e_priv *priv;
2679	struct ifreq *ifr;
2680	struct ifi2creq i2c;
2681	int error = 0;
2682	int mask = 0;
2683	int size_read = 0;
2684	int module_status;
2685	int module_num;
2686	int max_mtu;
2687	uint8_t read_addr;
2688
2689	priv = ifp->if_softc;
2690
2691	/* check if detaching */
2692	if (priv == NULL || priv->gone != 0)
2693		return (ENXIO);
2694
2695	switch (command) {
2696	case SIOCSIFMTU:
2697		ifr = (struct ifreq *)data;
2698
2699		PRIV_LOCK(priv);
2700		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2701
2702		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2703		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2704			int was_opened;
2705
2706			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2707			if (was_opened)
2708				mlx5e_close_locked(ifp);
2709
2710			/* set new MTU */
2711			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2712
2713			if (was_opened)
2714				mlx5e_open_locked(ifp);
2715		} else {
2716			error = EINVAL;
2717			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2718			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2719		}
2720		PRIV_UNLOCK(priv);
2721		break;
2722	case SIOCSIFFLAGS:
2723		if ((ifp->if_flags & IFF_UP) &&
2724		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2725			mlx5e_set_rx_mode(ifp);
2726			break;
2727		}
2728		PRIV_LOCK(priv);
2729		if (ifp->if_flags & IFF_UP) {
2730			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2731				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2732					mlx5e_open_locked(ifp);
2733				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2734				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2735			}
2736		} else {
2737			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2738				mlx5_set_port_status(priv->mdev,
2739				    MLX5_PORT_DOWN);
2740				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2741					mlx5e_close_locked(ifp);
2742				mlx5e_update_carrier(priv);
2743				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2744			}
2745		}
2746		PRIV_UNLOCK(priv);
2747		break;
2748	case SIOCADDMULTI:
2749	case SIOCDELMULTI:
2750		mlx5e_set_rx_mode(ifp);
2751		break;
2752	case SIOCSIFMEDIA:
2753	case SIOCGIFMEDIA:
2754	case SIOCGIFXMEDIA:
2755		ifr = (struct ifreq *)data;
2756		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2757		break;
2758	case SIOCSIFCAP:
2759		ifr = (struct ifreq *)data;
2760		PRIV_LOCK(priv);
2761		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2762
2763		if (mask & IFCAP_TXCSUM) {
2764			ifp->if_capenable ^= IFCAP_TXCSUM;
2765			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2766
2767			if (IFCAP_TSO4 & ifp->if_capenable &&
2768			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2769				ifp->if_capenable &= ~IFCAP_TSO4;
2770				ifp->if_hwassist &= ~CSUM_IP_TSO;
2771				if_printf(ifp,
2772				    "tso4 disabled due to -txcsum.\n");
2773			}
2774		}
2775		if (mask & IFCAP_TXCSUM_IPV6) {
2776			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2777			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2778
2779			if (IFCAP_TSO6 & ifp->if_capenable &&
2780			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2781				ifp->if_capenable &= ~IFCAP_TSO6;
2782				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2783				if_printf(ifp,
2784				    "tso6 disabled due to -txcsum6.\n");
2785			}
2786		}
2787		if (mask & IFCAP_RXCSUM)
2788			ifp->if_capenable ^= IFCAP_RXCSUM;
2789		if (mask & IFCAP_RXCSUM_IPV6)
2790			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2791		if (mask & IFCAP_TSO4) {
2792			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2793			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2794				if_printf(ifp, "enable txcsum first.\n");
2795				error = EAGAIN;
2796				goto out;
2797			}
2798			ifp->if_capenable ^= IFCAP_TSO4;
2799			ifp->if_hwassist ^= CSUM_IP_TSO;
2800		}
2801		if (mask & IFCAP_TSO6) {
2802			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2803			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2804				if_printf(ifp, "enable txcsum6 first.\n");
2805				error = EAGAIN;
2806				goto out;
2807			}
2808			ifp->if_capenable ^= IFCAP_TSO6;
2809			ifp->if_hwassist ^= CSUM_IP6_TSO;
2810		}
2811		if (mask & IFCAP_VLAN_HWFILTER) {
2812			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2813				mlx5e_disable_vlan_filter(priv);
2814			else
2815				mlx5e_enable_vlan_filter(priv);
2816
2817			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2818		}
2819		if (mask & IFCAP_VLAN_HWTAGGING)
2820			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2821		if (mask & IFCAP_WOL_MAGIC)
2822			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2823
2824		VLAN_CAPABILITIES(ifp);
2825		/* turn off LRO means also turn of HW LRO - if it's on */
2826		if (mask & IFCAP_LRO) {
2827			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2828			bool need_restart = false;
2829
2830			ifp->if_capenable ^= IFCAP_LRO;
2831			if (!(ifp->if_capenable & IFCAP_LRO)) {
2832				if (priv->params.hw_lro_en) {
2833					priv->params.hw_lro_en = false;
2834					need_restart = true;
2835					/* Not sure this is the correct way */
2836					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2837				}
2838			}
2839			if (was_opened && need_restart) {
2840				mlx5e_close_locked(ifp);
2841				mlx5e_open_locked(ifp);
2842			}
2843		}
2844out:
2845		PRIV_UNLOCK(priv);
2846		break;
2847
2848	case SIOCGI2C:
2849		ifr = (struct ifreq *)data;
2850
2851		/*
2852		 * Copy from the user-space address ifr_data to the
2853		 * kernel-space address i2c
2854		 */
2855		error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2856		if (error)
2857			break;
2858
2859		if (i2c.len > sizeof(i2c.data)) {
2860			error = EINVAL;
2861			break;
2862		}
2863
2864		PRIV_LOCK(priv);
2865		/* Get module_num which is required for the query_eeprom */
2866		error = mlx5_query_module_num(priv->mdev, &module_num);
2867		if (error) {
2868			if_printf(ifp, "Query module num failed, eeprom "
2869			    "reading is not supported\n");
2870			error = EINVAL;
2871			goto err_i2c;
2872		}
2873		/* Check if module is present before doing an access */
2874		module_status = mlx5_query_module_status(priv->mdev, module_num);
2875		if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
2876		    module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
2877			error = EINVAL;
2878			goto err_i2c;
2879		}
2880		/*
2881		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2882		 * The internal conversion is as follows:
2883		 */
2884		if (i2c.dev_addr == 0xA0)
2885			read_addr = MLX5E_I2C_ADDR_LOW;
2886		else if (i2c.dev_addr == 0xA2)
2887			read_addr = MLX5E_I2C_ADDR_HIGH;
2888		else {
2889			if_printf(ifp, "Query eeprom failed, "
2890			    "Invalid Address: %X\n", i2c.dev_addr);
2891			error = EINVAL;
2892			goto err_i2c;
2893		}
2894		error = mlx5_query_eeprom(priv->mdev,
2895		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2896		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2897		    (uint32_t *)i2c.data, &size_read);
2898		if (error) {
2899			if_printf(ifp, "Query eeprom failed, eeprom "
2900			    "reading is not supported\n");
2901			error = EINVAL;
2902			goto err_i2c;
2903		}
2904
2905		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2906			error = mlx5_query_eeprom(priv->mdev,
2907			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2908			    (uint32_t)(i2c.offset + size_read),
2909			    (uint32_t)(i2c.len - size_read), module_num,
2910			    (uint32_t *)(i2c.data + size_read), &size_read);
2911		}
2912		if (error) {
2913			if_printf(ifp, "Query eeprom failed, eeprom "
2914			    "reading is not supported\n");
2915			error = EINVAL;
2916			goto err_i2c;
2917		}
2918
2919		error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2920err_i2c:
2921		PRIV_UNLOCK(priv);
2922		break;
2923
2924	default:
2925		error = ether_ioctl(ifp, command, data);
2926		break;
2927	}
2928	return (error);
2929}
2930
2931static int
2932mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2933{
2934	/*
2935	 * TODO: uncoment once FW really sets all these bits if
2936	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2937	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2938	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2939	 * -ENOTSUPP;
2940	 */
2941
2942	/* TODO: add more must-to-have features */
2943
2944	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
2945		return (-ENODEV);
2946
2947	return (0);
2948}
2949
2950static u16
2951mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
2952{
2953	uint32_t bf_buf_size = (1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U;
2954
2955	bf_buf_size -= sizeof(struct mlx5e_tx_wqe) - 2;
2956
2957	/* verify against driver hardware limit */
2958	if (bf_buf_size > MLX5E_MAX_TX_INLINE)
2959		bf_buf_size = MLX5E_MAX_TX_INLINE;
2960
2961	return (bf_buf_size);
2962}
2963
2964static int
2965mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2966    struct mlx5e_priv *priv,
2967    int num_comp_vectors)
2968{
2969	int err;
2970
2971	/*
2972	 * TODO: Consider link speed for setting "log_sq_size",
2973	 * "log_rq_size" and "cq_moderation_xxx":
2974	 */
2975	priv->params.log_sq_size =
2976	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2977	priv->params.log_rq_size =
2978	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2979	priv->params.rx_cq_moderation_usec =
2980	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2981	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2982	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2983	priv->params.rx_cq_moderation_mode =
2984	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
2985	priv->params.rx_cq_moderation_pkts =
2986	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
2987	priv->params.tx_cq_moderation_usec =
2988	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
2989	priv->params.tx_cq_moderation_pkts =
2990	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
2991	priv->params.min_rx_wqes =
2992	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
2993	priv->params.rx_hash_log_tbl_sz =
2994	    (order_base_2(num_comp_vectors) >
2995	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
2996	    order_base_2(num_comp_vectors) :
2997	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
2998	priv->params.num_tc = 1;
2999	priv->params.default_vlan_prio = 0;
3000	priv->counter_set_id = -1;
3001	priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
3002
3003	err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
3004	if (err)
3005		return (err);
3006
3007	/*
3008	 * hw lro is currently defaulted to off. when it won't anymore we
3009	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
3010	 */
3011	priv->params.hw_lro_en = false;
3012	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
3013
3014	priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
3015
3016	priv->mdev = mdev;
3017	priv->params.num_channels = num_comp_vectors;
3018	priv->params.channels_rsss = 1;
3019	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
3020	priv->queue_mapping_channel_mask =
3021	    roundup_pow_of_two(num_comp_vectors) - 1;
3022	priv->num_tc = priv->params.num_tc;
3023	priv->default_vlan_prio = priv->params.default_vlan_prio;
3024
3025	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
3026	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
3027	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
3028
3029	return (0);
3030}
3031
3032static int
3033mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
3034		  struct mlx5_core_mr *mkey)
3035{
3036	struct ifnet *ifp = priv->ifp;
3037	struct mlx5_core_dev *mdev = priv->mdev;
3038	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
3039	void *mkc;
3040	u32 *in;
3041	int err;
3042
3043	in = mlx5_vzalloc(inlen);
3044	if (in == NULL) {
3045		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
3046		return (-ENOMEM);
3047	}
3048
3049	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
3050	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
3051	MLX5_SET(mkc, mkc, lw, 1);
3052	MLX5_SET(mkc, mkc, lr, 1);
3053
3054	MLX5_SET(mkc, mkc, pd, pdn);
3055	MLX5_SET(mkc, mkc, length64, 1);
3056	MLX5_SET(mkc, mkc, qpn, 0xffffff);
3057
3058	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
3059	if (err)
3060		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
3061		    __func__, err);
3062
3063	kvfree(in);
3064	return (err);
3065}
3066
3067static const char *mlx5e_vport_stats_desc[] = {
3068	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3069};
3070
3071static const char *mlx5e_pport_stats_desc[] = {
3072	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3073};
3074
3075static void
3076mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3077{
3078	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3079	sx_init(&priv->state_lock, "mlx5state");
3080	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3081	MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3082}
3083
3084static void
3085mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3086{
3087	mtx_destroy(&priv->async_events_mtx);
3088	sx_destroy(&priv->state_lock);
3089}
3090
3091static int
3092sysctl_firmware(SYSCTL_HANDLER_ARGS)
3093{
3094	/*
3095	 * %d.%d%.d the string format.
3096	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3097	 * We need at most 5 chars to store that.
3098	 * It also has: two "." and NULL at the end, which means we need 18
3099	 * (5*3 + 3) chars at most.
3100	 */
3101	char fw[18];
3102	struct mlx5e_priv *priv = arg1;
3103	int error;
3104
3105	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3106	    fw_rev_sub(priv->mdev));
3107	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3108	return (error);
3109}
3110
3111static void
3112mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3113{
3114	int i;
3115
3116	for (i = 0; i < ch->num_tc; i++)
3117		mlx5e_drain_sq(&ch->sq[i]);
3118}
3119
3120static void
3121mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3122{
3123
3124	sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3125	sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3126	mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3127	sq->doorbell.d64 = 0;
3128}
3129
3130void
3131mlx5e_resume_sq(struct mlx5e_sq *sq)
3132{
3133	int err;
3134
3135	/* check if already enabled */
3136	if (sq->stopped == 0)
3137		return;
3138
3139	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3140	    MLX5_SQC_STATE_RST);
3141	if (err != 0) {
3142		if_printf(sq->ifp,
3143		    "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3144	}
3145
3146	sq->cc = 0;
3147	sq->pc = 0;
3148
3149	/* reset doorbell prior to moving from RST to RDY */
3150	mlx5e_reset_sq_doorbell_record(sq);
3151
3152	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3153	    MLX5_SQC_STATE_RDY);
3154	if (err != 0) {
3155		if_printf(sq->ifp,
3156		    "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3157	}
3158
3159	mtx_lock(&sq->lock);
3160	sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3161	sq->stopped = 0;
3162	mtx_unlock(&sq->lock);
3163
3164}
3165
3166static void
3167mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3168{
3169        int i;
3170
3171	for (i = 0; i < ch->num_tc; i++)
3172		mlx5e_resume_sq(&ch->sq[i]);
3173}
3174
3175static void
3176mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3177{
3178	struct mlx5e_rq *rq = &ch->rq;
3179	int err;
3180
3181	mtx_lock(&rq->mtx);
3182	rq->enabled = 0;
3183	callout_stop(&rq->watchdog);
3184	mtx_unlock(&rq->mtx);
3185
3186	callout_drain(&rq->watchdog);
3187
3188	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3189	if (err != 0) {
3190		if_printf(rq->ifp,
3191		    "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3192	}
3193
3194	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3195		msleep(1);
3196		rq->cq.mcq.comp(&rq->cq.mcq);
3197	}
3198
3199	/*
3200	 * Transitioning into RST state will allow the FW to track less ERR state queues,
3201	 * thus reducing the recv queue flushing time
3202	 */
3203	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3204	if (err != 0) {
3205		if_printf(rq->ifp,
3206		    "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3207	}
3208}
3209
3210static void
3211mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3212{
3213	struct mlx5e_rq *rq = &ch->rq;
3214	int err;
3215
3216	rq->wq.wqe_ctr = 0;
3217	mlx5_wq_ll_update_db_record(&rq->wq);
3218	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3219	if (err != 0) {
3220		if_printf(rq->ifp,
3221		    "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3222        }
3223
3224	rq->enabled = 1;
3225
3226	rq->cq.mcq.comp(&rq->cq.mcq);
3227}
3228
3229void
3230mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3231{
3232	int i;
3233
3234	if (priv->channel == NULL)
3235		return;
3236
3237	for (i = 0; i < priv->params.num_channels; i++) {
3238
3239		if (!priv->channel[i])
3240			continue;
3241
3242		if (value)
3243			mlx5e_disable_tx_dma(priv->channel[i]);
3244		else
3245			mlx5e_enable_tx_dma(priv->channel[i]);
3246	}
3247}
3248
3249void
3250mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3251{
3252	int i;
3253
3254	if (priv->channel == NULL)
3255		return;
3256
3257	for (i = 0; i < priv->params.num_channels; i++) {
3258
3259		if (!priv->channel[i])
3260			continue;
3261
3262		if (value)
3263			mlx5e_disable_rx_dma(priv->channel[i]);
3264		else
3265			mlx5e_enable_rx_dma(priv->channel[i]);
3266	}
3267}
3268
3269static void
3270mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3271{
3272	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3273	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3274	    sysctl_firmware, "A", "HCA firmware version");
3275
3276	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3277	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3278	    "Board ID");
3279}
3280
3281static int
3282mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3283{
3284	struct mlx5e_priv *priv = arg1;
3285	uint32_t tx_pfc;
3286	uint32_t value;
3287	int error;
3288
3289	PRIV_LOCK(priv);
3290
3291	tx_pfc = priv->params.tx_priority_flow_control;
3292
3293	/* get current value */
3294	value = (tx_pfc >> arg2) & 1;
3295
3296	error = sysctl_handle_32(oidp, &value, 0, req);
3297
3298	/* range check value */
3299	if (value != 0)
3300		priv->params.tx_priority_flow_control |= (1 << arg2);
3301	else
3302		priv->params.tx_priority_flow_control &= ~(1 << arg2);
3303
3304	/* check if update is required */
3305	if (error == 0 && priv->gone == 0 &&
3306	    tx_pfc != priv->params.tx_priority_flow_control) {
3307		error = -mlx5e_set_port_pfc(priv);
3308		/* restore previous value */
3309		if (error != 0)
3310			priv->params.tx_priority_flow_control= tx_pfc;
3311	}
3312	PRIV_UNLOCK(priv);
3313
3314	return (error);
3315}
3316
3317static int
3318mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3319{
3320	struct mlx5e_priv *priv = arg1;
3321	uint32_t rx_pfc;
3322	uint32_t value;
3323	int error;
3324
3325	PRIV_LOCK(priv);
3326
3327	rx_pfc = priv->params.rx_priority_flow_control;
3328
3329	/* get current value */
3330	value = (rx_pfc >> arg2) & 1;
3331
3332	error = sysctl_handle_32(oidp, &value, 0, req);
3333
3334	/* range check value */
3335	if (value != 0)
3336		priv->params.rx_priority_flow_control |= (1 << arg2);
3337	else
3338		priv->params.rx_priority_flow_control &= ~(1 << arg2);
3339
3340	/* check if update is required */
3341	if (error == 0 && priv->gone == 0 &&
3342	    rx_pfc != priv->params.rx_priority_flow_control) {
3343		error = -mlx5e_set_port_pfc(priv);
3344		/* restore previous value */
3345		if (error != 0)
3346			priv->params.rx_priority_flow_control= rx_pfc;
3347	}
3348	PRIV_UNLOCK(priv);
3349
3350	return (error);
3351}
3352
3353static void
3354mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3355{
3356	unsigned int x;
3357	char path[96];
3358	int error;
3359
3360	/* enable pauseframes by default */
3361	priv->params.tx_pauseframe_control = 1;
3362	priv->params.rx_pauseframe_control = 1;
3363
3364	/* disable ports flow control, PFC, by default */
3365	priv->params.tx_priority_flow_control = 0;
3366	priv->params.rx_priority_flow_control = 0;
3367
3368#if (__FreeBSD_version < 1100000)
3369	/* compute path for sysctl */
3370	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3371	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3372
3373	/* try to fetch tunable, if any */
3374	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3375
3376	/* compute path for sysctl */
3377	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3378	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3379
3380	/* try to fetch tunable, if any */
3381	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3382
3383	for (x = 0; x != 8; x++) {
3384
3385		/* compute path for sysctl */
3386		snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u",
3387		    device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3388
3389		/* try to fetch tunable, if any */
3390		if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3391			priv->params.tx_priority_flow_control |= 1 << x;
3392
3393		/* compute path for sysctl */
3394		snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u",
3395		    device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3396
3397		/* try to fetch tunable, if any */
3398		if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3399			priv->params.rx_priority_flow_control |= 1 << x;
3400	}
3401#endif
3402
3403	/* register pauseframe SYSCTLs */
3404	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3405	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3406	    &priv->params.tx_pauseframe_control, 0,
3407	    "Set to enable TX pause frames. Clear to disable.");
3408
3409	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3410	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3411	    &priv->params.rx_pauseframe_control, 0,
3412	    "Set to enable RX pause frames. Clear to disable.");
3413
3414	/* register priority_flow control, PFC, SYSCTLs */
3415	for (x = 0; x != 8; x++) {
3416		snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x);
3417
3418		SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3419		    OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3420		    CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU",
3421		    "Set to enable TX ports flow control frames for given priority. Clear to disable.");
3422
3423		snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x);
3424
3425		SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3426		    OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3427		    CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU",
3428		    "Set to enable RX ports flow control frames for given priority. Clear to disable.");
3429	}
3430
3431	PRIV_LOCK(priv);
3432
3433	/* range check */
3434	priv->params.tx_pauseframe_control =
3435	    priv->params.tx_pauseframe_control ? 1 : 0;
3436	priv->params.rx_pauseframe_control =
3437	    priv->params.rx_pauseframe_control ? 1 : 0;
3438
3439	/* update firmware */
3440	error = mlx5e_set_port_pause_and_pfc(priv);
3441	if (error == -EINVAL) {
3442		if_printf(priv->ifp,
3443		    "Global pauseframes must be disabled before enabling PFC.\n");
3444		priv->params.rx_priority_flow_control = 0;
3445		priv->params.tx_priority_flow_control = 0;
3446
3447		/* update firmware */
3448		(void) mlx5e_set_port_pause_and_pfc(priv);
3449	}
3450	PRIV_UNLOCK(priv);
3451}
3452
3453static void *
3454mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3455{
3456	struct ifnet *ifp;
3457	struct mlx5e_priv *priv;
3458	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3459	struct sysctl_oid_list *child;
3460	int ncv = mdev->priv.eq_table.num_comp_vectors;
3461	char unit[16];
3462	int err;
3463	int i;
3464	u32 eth_proto_cap;
3465
3466	if (mlx5e_check_required_hca_cap(mdev)) {
3467		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3468		return (NULL);
3469	}
3470	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
3471	mlx5e_priv_mtx_init(priv);
3472
3473	ifp = priv->ifp = if_alloc(IFT_ETHER);
3474	if (ifp == NULL) {
3475		mlx5_core_err(mdev, "if_alloc() failed\n");
3476		goto err_free_priv;
3477	}
3478	ifp->if_softc = priv;
3479	if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
3480	ifp->if_mtu = ETHERMTU;
3481	ifp->if_init = mlx5e_open;
3482	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3483	ifp->if_ioctl = mlx5e_ioctl;
3484	ifp->if_transmit = mlx5e_xmit;
3485	ifp->if_qflush = if_qflush;
3486#if (__FreeBSD_version >= 1100000)
3487	ifp->if_get_counter = mlx5e_get_counter;
3488#endif
3489	ifp->if_snd.ifq_maxlen = ifqmaxlen;
3490	/*
3491         * Set driver features
3492         */
3493	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3494	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3495	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3496	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3497	ifp->if_capabilities |= IFCAP_LRO;
3498	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3499	ifp->if_capabilities |= IFCAP_HWSTATS;
3500
3501	/* set TSO limits so that we don't have to drop TX packets */
3502	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3503	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3504	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3505
3506	ifp->if_capenable = ifp->if_capabilities;
3507	ifp->if_hwassist = 0;
3508	if (ifp->if_capenable & IFCAP_TSO)
3509		ifp->if_hwassist |= CSUM_TSO;
3510	if (ifp->if_capenable & IFCAP_TXCSUM)
3511		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3512	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3513		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3514
3515	/* ifnet sysctl tree */
3516	sysctl_ctx_init(&priv->sysctl_ctx);
3517	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3518	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3519	if (priv->sysctl_ifnet == NULL) {
3520		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3521		goto err_free_sysctl;
3522	}
3523	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3524	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3525	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3526	if (priv->sysctl_ifnet == NULL) {
3527		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3528		goto err_free_sysctl;
3529	}
3530
3531	/* HW sysctl tree */
3532	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3533	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3534	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3535	if (priv->sysctl_hw == NULL) {
3536		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3537		goto err_free_sysctl;
3538	}
3539
3540	err = mlx5e_build_ifp_priv(mdev, priv, ncv);
3541	if (err) {
3542		mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
3543		goto err_free_sysctl;
3544	}
3545
3546	snprintf(unit, sizeof(unit), "mce%u_wq",
3547	    device_get_unit(mdev->pdev->dev.bsddev));
3548	priv->wq = alloc_workqueue(unit, 0, 1);
3549	if (priv->wq == NULL) {
3550		if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
3551		goto err_free_sysctl;
3552	}
3553
3554	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3555	if (err) {
3556		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3557		    __func__, err);
3558		goto err_free_wq;
3559	}
3560	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3561	if (err) {
3562		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3563		    __func__, err);
3564		goto err_unmap_free_uar;
3565	}
3566	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3567	if (err) {
3568		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3569		    __func__, err);
3570		goto err_dealloc_pd;
3571	}
3572	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3573	if (err) {
3574		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3575		    __func__, err);
3576		goto err_dealloc_transport_domain;
3577	}
3578	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3579
3580	/* check if we should generate a random MAC address */
3581	if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3582	    is_zero_ether_addr(dev_addr)) {
3583		random_ether_addr(dev_addr);
3584		if_printf(ifp, "Assigned random MAC address\n");
3585	}
3586
3587	/* set default MTU */
3588	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3589
3590	/* Set default media status */
3591	priv->media_status_last = IFM_AVALID;
3592	priv->media_active_last = IFM_ETHER | IFM_AUTO |
3593	    IFM_ETH_RXPAUSE | IFM_FDX;
3594
3595	/* setup default pauseframes configuration */
3596	mlx5e_setup_pauseframes(priv);
3597
3598	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3599	if (err) {
3600		eth_proto_cap = 0;
3601		if_printf(ifp, "%s: Query port media capability failed, %d\n",
3602		    __func__, err);
3603	}
3604
3605	/* Setup supported medias */
3606	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3607	    mlx5e_media_change, mlx5e_media_status);
3608
3609	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3610		if (mlx5e_mode_table[i].baudrate == 0)
3611			continue;
3612		if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3613			ifmedia_add(&priv->media,
3614			    mlx5e_mode_table[i].subtype |
3615			    IFM_ETHER, 0, NULL);
3616			ifmedia_add(&priv->media,
3617			    mlx5e_mode_table[i].subtype |
3618			    IFM_ETHER | IFM_FDX |
3619			    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3620		}
3621	}
3622
3623	/* Additional supported medias */
3624	ifmedia_add(&priv->media, IFM_10G_LR | IFM_ETHER, 0, NULL);
3625	ifmedia_add(&priv->media, IFM_10G_LR |
3626	    IFM_ETHER | IFM_FDX |
3627	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3628
3629	ifmedia_add(&priv->media, IFM_40G_ER4 | IFM_ETHER, 0, NULL);
3630	ifmedia_add(&priv->media, IFM_40G_ER4 |
3631	    IFM_ETHER | IFM_FDX |
3632	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3633
3634	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3635	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3636	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3637
3638	/* Set autoselect by default */
3639	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3640	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3641	ether_ifattach(ifp, dev_addr);
3642
3643	/* Register for VLAN events */
3644	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3645	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3646	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3647	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3648
3649	/* Link is down by default */
3650	if_link_state_change(ifp, LINK_STATE_DOWN);
3651
3652	mlx5e_enable_async_events(priv);
3653
3654	mlx5e_add_hw_stats(priv);
3655
3656	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3657	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3658	    priv->stats.vport.arg);
3659
3660	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3661	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3662	    priv->stats.pport.arg);
3663
3664	mlx5e_create_ethtool(priv);
3665
3666	mtx_lock(&priv->async_events_mtx);
3667	mlx5e_update_stats(priv);
3668	mtx_unlock(&priv->async_events_mtx);
3669
3670	return (priv);
3671
3672err_dealloc_transport_domain:
3673	mlx5_dealloc_transport_domain(mdev, priv->tdn);
3674
3675err_dealloc_pd:
3676	mlx5_core_dealloc_pd(mdev, priv->pdn);
3677
3678err_unmap_free_uar:
3679	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3680
3681err_free_wq:
3682	destroy_workqueue(priv->wq);
3683
3684err_free_sysctl:
3685	sysctl_ctx_free(&priv->sysctl_ctx);
3686	if (priv->sysctl_debug)
3687		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3688	if_free(ifp);
3689
3690err_free_priv:
3691	mlx5e_priv_mtx_destroy(priv);
3692	free(priv, M_MLX5EN);
3693	return (NULL);
3694}
3695
3696static void
3697mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3698{
3699	struct mlx5e_priv *priv = vpriv;
3700	struct ifnet *ifp = priv->ifp;
3701
3702	/* don't allow more IOCTLs */
3703	priv->gone = 1;
3704
3705	/* XXX wait a bit to allow IOCTL handlers to complete */
3706	pause("W", hz);
3707
3708	/* stop watchdog timer */
3709	callout_drain(&priv->watchdog);
3710
3711	if (priv->vlan_attach != NULL)
3712		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3713	if (priv->vlan_detach != NULL)
3714		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3715
3716	/* make sure device gets closed */
3717	PRIV_LOCK(priv);
3718	mlx5e_close_locked(ifp);
3719	PRIV_UNLOCK(priv);
3720
3721	/* unregister device */
3722	ifmedia_removeall(&priv->media);
3723	ether_ifdetach(ifp);
3724	if_free(ifp);
3725
3726	/* destroy all remaining sysctl nodes */
3727	sysctl_ctx_free(&priv->stats.vport.ctx);
3728	sysctl_ctx_free(&priv->stats.pport.ctx);
3729	sysctl_ctx_free(&priv->sysctl_ctx);
3730	if (priv->sysctl_debug)
3731		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3732
3733	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3734	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3735	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3736	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3737	mlx5e_disable_async_events(priv);
3738	destroy_workqueue(priv->wq);
3739	mlx5e_priv_mtx_destroy(priv);
3740	free(priv, M_MLX5EN);
3741}
3742
3743static void *
3744mlx5e_get_ifp(void *vpriv)
3745{
3746	struct mlx5e_priv *priv = vpriv;
3747
3748	return (priv->ifp);
3749}
3750
3751static struct mlx5_interface mlx5e_interface = {
3752	.add = mlx5e_create_ifp,
3753	.remove = mlx5e_destroy_ifp,
3754	.event = mlx5e_async_event,
3755	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3756	.get_dev = mlx5e_get_ifp,
3757};
3758
3759void
3760mlx5e_init(void)
3761{
3762	mlx5_register_interface(&mlx5e_interface);
3763}
3764
3765void
3766mlx5e_cleanup(void)
3767{
3768	mlx5_unregister_interface(&mlx5e_interface);
3769}
3770
3771static void
3772mlx5e_show_version(void __unused *arg)
3773{
3774
3775	printf("%s", mlx5e_version);
3776}
3777SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL);
3778
3779module_init_order(mlx5e_init, SI_ORDER_THIRD);
3780module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3781
3782#if (__FreeBSD_version >= 1100000)
3783MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3784#endif
3785MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3786MODULE_VERSION(mlx5en, 1);
3787