mlx5_en_main.c revision 347796
1/*-
2 * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c 347796 2019-05-16 17:09:06Z hselasky $
26 */
27
28#include "en.h"
29
30#include <sys/sockio.h>
31#include <machine/atomic.h>
32
33#ifndef ETH_DRIVER_VERSION
34#define	ETH_DRIVER_VERSION	"3.5.0"
35#endif
36#define DRIVER_RELDATE	"November 2018"
37
38static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver "
39	ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
40
41static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
42
43struct mlx5e_channel_param {
44	struct mlx5e_rq_param rq;
45	struct mlx5e_sq_param sq;
46	struct mlx5e_cq_param rx_cq;
47	struct mlx5e_cq_param tx_cq;
48};
49
50static const struct {
51	u32	subtype;
52	u64	baudrate;
53}	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
54
55	[MLX5E_1000BASE_CX_SGMII] = {
56		.subtype = IFM_1000_CX_SGMII,
57		.baudrate = IF_Mbps(1000ULL),
58	},
59	[MLX5E_1000BASE_KX] = {
60		.subtype = IFM_1000_KX,
61		.baudrate = IF_Mbps(1000ULL),
62	},
63	[MLX5E_10GBASE_CX4] = {
64		.subtype = IFM_10G_CX4,
65		.baudrate = IF_Gbps(10ULL),
66	},
67	[MLX5E_10GBASE_KX4] = {
68		.subtype = IFM_10G_KX4,
69		.baudrate = IF_Gbps(10ULL),
70	},
71	[MLX5E_10GBASE_KR] = {
72		.subtype = IFM_10G_KR,
73		.baudrate = IF_Gbps(10ULL),
74	},
75	[MLX5E_20GBASE_KR2] = {
76		.subtype = IFM_20G_KR2,
77		.baudrate = IF_Gbps(20ULL),
78	},
79	[MLX5E_40GBASE_CR4] = {
80		.subtype = IFM_40G_CR4,
81		.baudrate = IF_Gbps(40ULL),
82	},
83	[MLX5E_40GBASE_KR4] = {
84		.subtype = IFM_40G_KR4,
85		.baudrate = IF_Gbps(40ULL),
86	},
87	[MLX5E_56GBASE_R4] = {
88		.subtype = IFM_56G_R4,
89		.baudrate = IF_Gbps(56ULL),
90	},
91	[MLX5E_10GBASE_CR] = {
92		.subtype = IFM_10G_CR1,
93		.baudrate = IF_Gbps(10ULL),
94	},
95	[MLX5E_10GBASE_SR] = {
96		.subtype = IFM_10G_SR,
97		.baudrate = IF_Gbps(10ULL),
98	},
99	[MLX5E_10GBASE_ER] = {
100		.subtype = IFM_10G_ER,
101		.baudrate = IF_Gbps(10ULL),
102	},
103	[MLX5E_40GBASE_SR4] = {
104		.subtype = IFM_40G_SR4,
105		.baudrate = IF_Gbps(40ULL),
106	},
107	[MLX5E_40GBASE_LR4] = {
108		.subtype = IFM_40G_LR4,
109		.baudrate = IF_Gbps(40ULL),
110	},
111	[MLX5E_100GBASE_CR4] = {
112		.subtype = IFM_100G_CR4,
113		.baudrate = IF_Gbps(100ULL),
114	},
115	[MLX5E_100GBASE_SR4] = {
116		.subtype = IFM_100G_SR4,
117		.baudrate = IF_Gbps(100ULL),
118	},
119	[MLX5E_100GBASE_KR4] = {
120		.subtype = IFM_100G_KR4,
121		.baudrate = IF_Gbps(100ULL),
122	},
123	[MLX5E_100GBASE_LR4] = {
124		.subtype = IFM_100G_LR4,
125		.baudrate = IF_Gbps(100ULL),
126	},
127	[MLX5E_100BASE_TX] = {
128		.subtype = IFM_100_TX,
129		.baudrate = IF_Mbps(100ULL),
130	},
131	[MLX5E_1000BASE_T] = {
132		.subtype = IFM_1000_T,
133		.baudrate = IF_Mbps(1000ULL),
134	},
135	[MLX5E_10GBASE_T] = {
136		.subtype = IFM_10G_T,
137		.baudrate = IF_Gbps(10ULL),
138	},
139	[MLX5E_25GBASE_CR] = {
140		.subtype = IFM_25G_CR,
141		.baudrate = IF_Gbps(25ULL),
142	},
143	[MLX5E_25GBASE_KR] = {
144		.subtype = IFM_25G_KR,
145		.baudrate = IF_Gbps(25ULL),
146	},
147	[MLX5E_25GBASE_SR] = {
148		.subtype = IFM_25G_SR,
149		.baudrate = IF_Gbps(25ULL),
150	},
151	[MLX5E_50GBASE_CR2] = {
152		.subtype = IFM_50G_CR2,
153		.baudrate = IF_Gbps(50ULL),
154	},
155	[MLX5E_50GBASE_KR2] = {
156		.subtype = IFM_50G_KR2,
157		.baudrate = IF_Gbps(50ULL),
158	},
159};
160
161MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
162
163static void
164mlx5e_update_carrier(struct mlx5e_priv *priv)
165{
166	struct mlx5_core_dev *mdev = priv->mdev;
167	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
168	u32 eth_proto_oper;
169	int error;
170	u8 port_state;
171	u8 is_er_type;
172	u8 i;
173
174	port_state = mlx5_query_vport_state(mdev,
175	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
176
177	if (port_state == VPORT_STATE_UP) {
178		priv->media_status_last |= IFM_ACTIVE;
179	} else {
180		priv->media_status_last &= ~IFM_ACTIVE;
181		priv->media_active_last = IFM_ETHER;
182		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
183		return;
184	}
185
186	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
187	if (error) {
188		priv->media_active_last = IFM_ETHER;
189		priv->ifp->if_baudrate = 1;
190		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
191		    __func__, error);
192		return;
193	}
194	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
195
196	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
197		if (mlx5e_mode_table[i].baudrate == 0)
198			continue;
199		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
200			u32 subtype = mlx5e_mode_table[i].subtype;
201
202			priv->ifp->if_baudrate =
203			    mlx5e_mode_table[i].baudrate;
204
205			switch (subtype) {
206			case IFM_10G_ER:
207				error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
208				if (error != 0) {
209					if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
210					    __func__, error);
211				}
212				if (error != 0 || is_er_type == 0)
213					subtype = IFM_10G_LR;
214				break;
215			case IFM_40G_LR4:
216				error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
217				if (error != 0) {
218					if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
219					    __func__, error);
220				}
221				if (error == 0 && is_er_type != 0)
222					subtype = IFM_40G_ER4;
223				break;
224			}
225			priv->media_active_last = subtype | IFM_ETHER | IFM_FDX;
226			break;
227		}
228	}
229	if_link_state_change(priv->ifp, LINK_STATE_UP);
230}
231
232static void
233mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
234{
235	struct mlx5e_priv *priv = dev->if_softc;
236
237	ifmr->ifm_status = priv->media_status_last;
238	ifmr->ifm_active = priv->media_active_last |
239	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
240	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
241
242}
243
244static u32
245mlx5e_find_link_mode(u32 subtype)
246{
247	u32 i;
248	u32 link_mode = 0;
249
250	switch (subtype) {
251	case IFM_10G_LR:
252		subtype = IFM_10G_ER;
253		break;
254	case IFM_40G_ER4:
255		subtype = IFM_40G_LR4;
256		break;
257	}
258
259	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
260		if (mlx5e_mode_table[i].baudrate == 0)
261			continue;
262		if (mlx5e_mode_table[i].subtype == subtype)
263			link_mode |= MLX5E_PROT_MASK(i);
264	}
265
266	return (link_mode);
267}
268
269static int
270mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
271{
272	return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
273	    priv->params.rx_pauseframe_control,
274	    priv->params.tx_pauseframe_control,
275	    priv->params.rx_priority_flow_control,
276	    priv->params.tx_priority_flow_control));
277}
278
279static int
280mlx5e_set_port_pfc(struct mlx5e_priv *priv)
281{
282	int error;
283
284	if (priv->params.rx_pauseframe_control ||
285	    priv->params.tx_pauseframe_control) {
286		if_printf(priv->ifp,
287		    "Global pauseframes must be disabled before enabling PFC.\n");
288		error = -EINVAL;
289	} else {
290		error = mlx5e_set_port_pause_and_pfc(priv);
291	}
292	return (error);
293}
294
295static int
296mlx5e_media_change(struct ifnet *dev)
297{
298	struct mlx5e_priv *priv = dev->if_softc;
299	struct mlx5_core_dev *mdev = priv->mdev;
300	u32 eth_proto_cap;
301	u32 link_mode;
302	int was_opened;
303	int locked;
304	int error;
305
306	locked = PRIV_LOCKED(priv);
307	if (!locked)
308		PRIV_LOCK(priv);
309
310	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
311		error = EINVAL;
312		goto done;
313	}
314	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
315
316	/* query supported capabilities */
317	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
318	if (error != 0) {
319		if_printf(dev, "Query port media capability failed\n");
320		goto done;
321	}
322	/* check for autoselect */
323	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
324		link_mode = eth_proto_cap;
325		if (link_mode == 0) {
326			if_printf(dev, "Port media capability is zero\n");
327			error = EINVAL;
328			goto done;
329		}
330	} else {
331		link_mode = link_mode & eth_proto_cap;
332		if (link_mode == 0) {
333			if_printf(dev, "Not supported link mode requested\n");
334			error = EINVAL;
335			goto done;
336		}
337	}
338	if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
339		/* check if PFC is enabled */
340		if (priv->params.rx_priority_flow_control ||
341		    priv->params.tx_priority_flow_control) {
342			if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
343			error = EINVAL;
344			goto done;
345		}
346	}
347	/* update pauseframe control bits */
348	priv->params.rx_pauseframe_control =
349	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
350	priv->params.tx_pauseframe_control =
351	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
352
353	/* check if device is opened */
354	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
355
356	/* reconfigure the hardware */
357	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
358	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
359	error = -mlx5e_set_port_pause_and_pfc(priv);
360	if (was_opened)
361		mlx5_set_port_status(mdev, MLX5_PORT_UP);
362
363done:
364	if (!locked)
365		PRIV_UNLOCK(priv);
366	return (error);
367}
368
369static void
370mlx5e_update_carrier_work(struct work_struct *work)
371{
372	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
373	    update_carrier_work);
374
375	PRIV_LOCK(priv);
376	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
377		mlx5e_update_carrier(priv);
378	PRIV_UNLOCK(priv);
379}
380
381/*
382 * This function reads the physical port counters from the firmware
383 * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
384 * macros. The output is converted from big-endian 64-bit values into
385 * host endian ones and stored in the "priv->stats.pport" structure.
386 */
387static void
388mlx5e_update_pport_counters(struct mlx5e_priv *priv)
389{
390	struct mlx5_core_dev *mdev = priv->mdev;
391	struct mlx5e_pport_stats *s = &priv->stats.pport;
392	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
393	u32 *in;
394	u32 *out;
395	const u64 *ptr;
396	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
397	unsigned x;
398	unsigned y;
399	unsigned z;
400
401	/* allocate firmware request structures */
402	in = mlx5_vzalloc(sz);
403	out = mlx5_vzalloc(sz);
404	if (in == NULL || out == NULL)
405		goto free_out;
406
407	/*
408	 * Get pointer to the 64-bit counter set which is located at a
409	 * fixed offset in the output firmware request structure:
410	 */
411	ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
412
413	MLX5_SET(ppcnt_reg, in, local_port, 1);
414
415	/* read IEEE802_3 counter group using predefined counter layout */
416	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
417	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
418	for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
419	     x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
420		s->arg[y] = be64toh(ptr[x]);
421
422	/* read RFC2819 counter group using predefined counter layout */
423	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
424	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
425	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
426		s->arg[y] = be64toh(ptr[x]);
427	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
428	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
429		s_debug->arg[y] = be64toh(ptr[x]);
430
431	/* read RFC2863 counter group using predefined counter layout */
432	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
433	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
434	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
435		s_debug->arg[y] = be64toh(ptr[x]);
436
437	/* read physical layer stats counter group using predefined counter layout */
438	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
439	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
440	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
441		s_debug->arg[y] = be64toh(ptr[x]);
442
443	/* read per-priority counters */
444	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
445
446	/* iterate all the priorities */
447	for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
448		MLX5_SET(ppcnt_reg, in, prio_tc, z);
449		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
450
451		/* read per priority stats counter group using predefined counter layout */
452		for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
453		    MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
454			s->arg[y] = be64toh(ptr[x]);
455	}
456free_out:
457	/* free firmware request structures */
458	kvfree(in);
459	kvfree(out);
460}
461
462/*
463 * This function is called regularly to collect all statistics
464 * counters from the firmware. The values can be viewed through the
465 * sysctl interface. Execution is serialized using the priv's global
466 * configuration lock.
467 */
468static void
469mlx5e_update_stats_work(struct work_struct *work)
470{
471	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
472	    update_stats_work);
473	struct mlx5_core_dev *mdev = priv->mdev;
474	struct mlx5e_vport_stats *s = &priv->stats.vport;
475	struct mlx5e_sq_stats *sq_stats;
476	struct buf_ring *sq_br;
477#if (__FreeBSD_version < 1100000)
478	struct ifnet *ifp = priv->ifp;
479#endif
480
481	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
482	u32 *out;
483	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
484	u64 tso_packets = 0;
485	u64 tso_bytes = 0;
486	u64 tx_queue_dropped = 0;
487	u64 tx_defragged = 0;
488	u64 tx_offload_none = 0;
489	u64 lro_packets = 0;
490	u64 lro_bytes = 0;
491	u64 sw_lro_queued = 0;
492	u64 sw_lro_flushed = 0;
493	u64 rx_csum_none = 0;
494	u64 rx_wqe_err = 0;
495	u32 rx_out_of_buffer = 0;
496	int i;
497	int j;
498
499	PRIV_LOCK(priv);
500	out = mlx5_vzalloc(outlen);
501	if (out == NULL)
502		goto free_out;
503	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
504		goto free_out;
505
506	/* Collect firts the SW counters and then HW for consistency */
507	for (i = 0; i < priv->params.num_channels; i++) {
508		struct mlx5e_channel *pch = priv->channel + i;
509		struct mlx5e_rq *rq = &pch->rq;
510		struct mlx5e_rq_stats *rq_stats = &pch->rq.stats;
511
512		/* collect stats from LRO */
513		rq_stats->sw_lro_queued = rq->lro.lro_queued;
514		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
515		sw_lro_queued += rq_stats->sw_lro_queued;
516		sw_lro_flushed += rq_stats->sw_lro_flushed;
517		lro_packets += rq_stats->lro_packets;
518		lro_bytes += rq_stats->lro_bytes;
519		rx_csum_none += rq_stats->csum_none;
520		rx_wqe_err += rq_stats->wqe_err;
521
522		for (j = 0; j < priv->num_tc; j++) {
523			sq_stats = &pch->sq[j].stats;
524			sq_br = pch->sq[j].br;
525
526			tso_packets += sq_stats->tso_packets;
527			tso_bytes += sq_stats->tso_bytes;
528			tx_queue_dropped += sq_stats->dropped;
529			if (sq_br != NULL)
530				tx_queue_dropped += sq_br->br_drops;
531			tx_defragged += sq_stats->defragged;
532			tx_offload_none += sq_stats->csum_offload_none;
533		}
534	}
535
536	s->tx_jumbo_packets =
537	    priv->stats.port_stats_debug.p1519to2047octets +
538	    priv->stats.port_stats_debug.p2048to4095octets +
539	    priv->stats.port_stats_debug.p4096to8191octets +
540	    priv->stats.port_stats_debug.p8192to10239octets;
541
542	/* update counters */
543	s->tso_packets = tso_packets;
544	s->tso_bytes = tso_bytes;
545	s->tx_queue_dropped = tx_queue_dropped;
546	s->tx_defragged = tx_defragged;
547	s->lro_packets = lro_packets;
548	s->lro_bytes = lro_bytes;
549	s->sw_lro_queued = sw_lro_queued;
550	s->sw_lro_flushed = sw_lro_flushed;
551	s->rx_csum_none = rx_csum_none;
552	s->rx_wqe_err = rx_wqe_err;
553
554	/* HW counters */
555	memset(in, 0, sizeof(in));
556
557	MLX5_SET(query_vport_counter_in, in, opcode,
558	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
559	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
560	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
561
562	memset(out, 0, outlen);
563
564	/* get number of out-of-buffer drops first */
565	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
566	    &rx_out_of_buffer))
567		goto free_out;
568
569	/* accumulate difference into a 64-bit counter */
570	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
571	s->rx_out_of_buffer_prev = rx_out_of_buffer;
572
573	/* get port statistics */
574	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
575		goto free_out;
576
577#define	MLX5_GET_CTR(out, x) \
578	MLX5_GET64(query_vport_counter_out, out, x)
579
580	s->rx_error_packets =
581	    MLX5_GET_CTR(out, received_errors.packets);
582	s->rx_error_bytes =
583	    MLX5_GET_CTR(out, received_errors.octets);
584	s->tx_error_packets =
585	    MLX5_GET_CTR(out, transmit_errors.packets);
586	s->tx_error_bytes =
587	    MLX5_GET_CTR(out, transmit_errors.octets);
588
589	s->rx_unicast_packets =
590	    MLX5_GET_CTR(out, received_eth_unicast.packets);
591	s->rx_unicast_bytes =
592	    MLX5_GET_CTR(out, received_eth_unicast.octets);
593	s->tx_unicast_packets =
594	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
595	s->tx_unicast_bytes =
596	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
597
598	s->rx_multicast_packets =
599	    MLX5_GET_CTR(out, received_eth_multicast.packets);
600	s->rx_multicast_bytes =
601	    MLX5_GET_CTR(out, received_eth_multicast.octets);
602	s->tx_multicast_packets =
603	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
604	s->tx_multicast_bytes =
605	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
606
607	s->rx_broadcast_packets =
608	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
609	s->rx_broadcast_bytes =
610	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
611	s->tx_broadcast_packets =
612	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
613	s->tx_broadcast_bytes =
614	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
615
616	s->rx_packets =
617	    s->rx_unicast_packets +
618	    s->rx_multicast_packets +
619	    s->rx_broadcast_packets -
620	    s->rx_out_of_buffer;
621	s->rx_bytes =
622	    s->rx_unicast_bytes +
623	    s->rx_multicast_bytes +
624	    s->rx_broadcast_bytes;
625	s->tx_packets =
626	    s->tx_unicast_packets +
627	    s->tx_multicast_packets +
628	    s->tx_broadcast_packets;
629	s->tx_bytes =
630	    s->tx_unicast_bytes +
631	    s->tx_multicast_bytes +
632	    s->tx_broadcast_bytes;
633
634	/* Update calculated offload counters */
635	s->tx_csum_offload = s->tx_packets - tx_offload_none;
636	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
637
638	/* Get physical port counters */
639	mlx5e_update_pport_counters(priv);
640
641#if (__FreeBSD_version < 1100000)
642	/* no get_counters interface in fbsd 10 */
643	ifp->if_ipackets = s->rx_packets;
644	ifp->if_ierrors = s->rx_error_packets +
645	    priv->stats.pport.alignment_err +
646	    priv->stats.pport.check_seq_err +
647	    priv->stats.pport.crc_align_errors +
648	    priv->stats.pport.in_range_len_errors +
649	    priv->stats.pport.jabbers +
650	    priv->stats.pport.out_of_range_len +
651	    priv->stats.pport.oversize_pkts +
652	    priv->stats.pport.symbol_err +
653	    priv->stats.pport.too_long_errors +
654	    priv->stats.pport.undersize_pkts +
655	    priv->stats.pport.unsupported_op_rx;
656	ifp->if_iqdrops = s->rx_out_of_buffer +
657	    priv->stats.pport.drop_events;
658	ifp->if_opackets = s->tx_packets;
659	ifp->if_oerrors = s->tx_error_packets;
660	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
661	ifp->if_ibytes = s->rx_bytes;
662	ifp->if_obytes = s->tx_bytes;
663	ifp->if_collisions =
664	    priv->stats.pport.collisions;
665#endif
666
667free_out:
668	kvfree(out);
669
670	/* Update diagnostics, if any */
671	if (priv->params_ethtool.diag_pci_enable ||
672	    priv->params_ethtool.diag_general_enable) {
673		int error = mlx5_core_get_diagnostics_full(mdev,
674		    priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
675		    priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
676		if (error != 0)
677			if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
678	}
679	PRIV_UNLOCK(priv);
680}
681
682static void
683mlx5e_update_stats(void *arg)
684{
685	struct mlx5e_priv *priv = arg;
686
687	queue_work(priv->wq, &priv->update_stats_work);
688
689	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
690}
691
692static void
693mlx5e_async_event_sub(struct mlx5e_priv *priv,
694    enum mlx5_dev_event event)
695{
696	switch (event) {
697	case MLX5_DEV_EVENT_PORT_UP:
698	case MLX5_DEV_EVENT_PORT_DOWN:
699		queue_work(priv->wq, &priv->update_carrier_work);
700		break;
701
702	default:
703		break;
704	}
705}
706
707static void
708mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
709    enum mlx5_dev_event event, unsigned long param)
710{
711	struct mlx5e_priv *priv = vpriv;
712
713	mtx_lock(&priv->async_events_mtx);
714	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
715		mlx5e_async_event_sub(priv, event);
716	mtx_unlock(&priv->async_events_mtx);
717}
718
719static void
720mlx5e_enable_async_events(struct mlx5e_priv *priv)
721{
722	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
723}
724
725static void
726mlx5e_disable_async_events(struct mlx5e_priv *priv)
727{
728	mtx_lock(&priv->async_events_mtx);
729	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
730	mtx_unlock(&priv->async_events_mtx);
731}
732
733static const char *mlx5e_rq_stats_desc[] = {
734	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
735};
736
737static int
738mlx5e_create_rq(struct mlx5e_channel *c,
739    struct mlx5e_rq_param *param,
740    struct mlx5e_rq *rq)
741{
742	struct mlx5e_priv *priv = c->priv;
743	struct mlx5_core_dev *mdev = priv->mdev;
744	char buffer[16];
745	void *rqc = param->rqc;
746	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
747	int wq_sz;
748	int err;
749	int i;
750	u32 nsegs, wqe_sz;
751
752	err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
753	if (err != 0)
754		goto done;
755
756	/* Create DMA descriptor TAG */
757	if ((err = -bus_dma_tag_create(
758	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
759	    1,				/* any alignment */
760	    0,				/* no boundary */
761	    BUS_SPACE_MAXADDR,		/* lowaddr */
762	    BUS_SPACE_MAXADDR,		/* highaddr */
763	    NULL, NULL,			/* filter, filterarg */
764	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsize */
765	    nsegs,			/* nsegments */
766	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsegsize */
767	    0,				/* flags */
768	    NULL, NULL,			/* lockfunc, lockfuncarg */
769	    &rq->dma_tag)))
770		goto done;
771
772	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
773	    &rq->wq_ctrl);
774	if (err)
775		goto err_free_dma_tag;
776
777	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
778
779	err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
780	if (err != 0)
781		goto err_rq_wq_destroy;
782
783	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
784
785	err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
786	if (err)
787		goto err_rq_wq_destroy;
788
789	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
790	for (i = 0; i != wq_sz; i++) {
791		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
792#if (MLX5E_MAX_RX_SEGS == 1)
793		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
794#else
795		int j;
796#endif
797
798		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
799		if (err != 0) {
800			while (i--)
801				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
802			goto err_rq_mbuf_free;
803		}
804
805		/* set value for constant fields */
806#if (MLX5E_MAX_RX_SEGS == 1)
807		wqe->data[0].lkey = c->mkey_be;
808		wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
809#else
810		for (j = 0; j < rq->nsegs; j++)
811			wqe->data[j].lkey = c->mkey_be;
812#endif
813	}
814
815	INIT_WORK(&rq->dim.work, mlx5e_dim_work);
816	if (priv->params.rx_cq_moderation_mode < 2) {
817		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
818	} else {
819		void *cqc = container_of(param,
820		    struct mlx5e_channel_param, rq)->rx_cq.cqc;
821
822		switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
823		case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
824			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
825			break;
826		case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
827			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
828			break;
829		default:
830			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
831			break;
832		}
833	}
834
835	rq->ifp = c->ifp;
836	rq->channel = c;
837	rq->ix = c->ix;
838
839	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
840	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
841	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
842	    rq->stats.arg);
843	return (0);
844
845err_rq_mbuf_free:
846	free(rq->mbuf, M_MLX5EN);
847	tcp_lro_free(&rq->lro);
848err_rq_wq_destroy:
849	mlx5_wq_destroy(&rq->wq_ctrl);
850err_free_dma_tag:
851	bus_dma_tag_destroy(rq->dma_tag);
852done:
853	return (err);
854}
855
856static void
857mlx5e_destroy_rq(struct mlx5e_rq *rq)
858{
859	int wq_sz;
860	int i;
861
862	/* destroy all sysctl nodes */
863	sysctl_ctx_free(&rq->stats.ctx);
864
865	/* free leftover LRO packets, if any */
866	tcp_lro_free(&rq->lro);
867
868	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
869	for (i = 0; i != wq_sz; i++) {
870		if (rq->mbuf[i].mbuf != NULL) {
871			bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
872			m_freem(rq->mbuf[i].mbuf);
873		}
874		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
875	}
876	free(rq->mbuf, M_MLX5EN);
877	mlx5_wq_destroy(&rq->wq_ctrl);
878}
879
880static int
881mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
882{
883	struct mlx5e_channel *c = rq->channel;
884	struct mlx5e_priv *priv = c->priv;
885	struct mlx5_core_dev *mdev = priv->mdev;
886
887	void *in;
888	void *rqc;
889	void *wq;
890	int inlen;
891	int err;
892
893	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
894	    sizeof(u64) * rq->wq_ctrl.buf.npages;
895	in = mlx5_vzalloc(inlen);
896	if (in == NULL)
897		return (-ENOMEM);
898
899	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
900	wq = MLX5_ADDR_OF(rqc, rqc, wq);
901
902	memcpy(rqc, param->rqc, sizeof(param->rqc));
903
904	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
905	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
906	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
907	if (priv->counter_set_id >= 0)
908		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
909	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
910	    PAGE_SHIFT);
911	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
912
913	mlx5_fill_page_array(&rq->wq_ctrl.buf,
914	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
915
916	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
917
918	kvfree(in);
919
920	return (err);
921}
922
923static int
924mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
925{
926	struct mlx5e_channel *c = rq->channel;
927	struct mlx5e_priv *priv = c->priv;
928	struct mlx5_core_dev *mdev = priv->mdev;
929
930	void *in;
931	void *rqc;
932	int inlen;
933	int err;
934
935	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
936	in = mlx5_vzalloc(inlen);
937	if (in == NULL)
938		return (-ENOMEM);
939
940	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
941
942	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
943	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
944	MLX5_SET(rqc, rqc, state, next_state);
945
946	err = mlx5_core_modify_rq(mdev, in, inlen);
947
948	kvfree(in);
949
950	return (err);
951}
952
953static void
954mlx5e_disable_rq(struct mlx5e_rq *rq)
955{
956	struct mlx5e_channel *c = rq->channel;
957	struct mlx5e_priv *priv = c->priv;
958	struct mlx5_core_dev *mdev = priv->mdev;
959
960	mlx5_core_destroy_rq(mdev, rq->rqn);
961}
962
963static int
964mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
965{
966	struct mlx5e_channel *c = rq->channel;
967	struct mlx5e_priv *priv = c->priv;
968	struct mlx5_wq_ll *wq = &rq->wq;
969	int i;
970
971	for (i = 0; i < 1000; i++) {
972		if (wq->cur_sz >= priv->params.min_rx_wqes)
973			return (0);
974
975		msleep(4);
976	}
977	return (-ETIMEDOUT);
978}
979
980static int
981mlx5e_open_rq(struct mlx5e_channel *c,
982    struct mlx5e_rq_param *param,
983    struct mlx5e_rq *rq)
984{
985	int err;
986
987	err = mlx5e_create_rq(c, param, rq);
988	if (err)
989		return (err);
990
991	err = mlx5e_enable_rq(rq, param);
992	if (err)
993		goto err_destroy_rq;
994
995	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
996	if (err)
997		goto err_disable_rq;
998
999	c->rq.enabled = 1;
1000
1001	return (0);
1002
1003err_disable_rq:
1004	mlx5e_disable_rq(rq);
1005err_destroy_rq:
1006	mlx5e_destroy_rq(rq);
1007
1008	return (err);
1009}
1010
1011static void
1012mlx5e_close_rq(struct mlx5e_rq *rq)
1013{
1014	mtx_lock(&rq->mtx);
1015	rq->enabled = 0;
1016	callout_stop(&rq->watchdog);
1017	mtx_unlock(&rq->mtx);
1018
1019	callout_drain(&rq->watchdog);
1020
1021	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1022}
1023
1024static void
1025mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1026{
1027	struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
1028
1029	/* wait till RQ is empty */
1030	while (!mlx5_wq_ll_is_empty(&rq->wq) &&
1031	       (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
1032		msleep(4);
1033		rq->cq.mcq.comp(&rq->cq.mcq);
1034	}
1035
1036	cancel_work_sync(&rq->dim.work);
1037	mlx5e_disable_rq(rq);
1038	mlx5e_destroy_rq(rq);
1039}
1040
1041void
1042mlx5e_free_sq_db(struct mlx5e_sq *sq)
1043{
1044	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1045	int x;
1046
1047	for (x = 0; x != wq_sz; x++)
1048		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1049	free(sq->mbuf, M_MLX5EN);
1050}
1051
1052int
1053mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1054{
1055	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1056	int err;
1057	int x;
1058
1059	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1060
1061	/* Create DMA descriptor MAPs */
1062	for (x = 0; x != wq_sz; x++) {
1063		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1064		if (err != 0) {
1065			while (x--)
1066				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1067			free(sq->mbuf, M_MLX5EN);
1068			return (err);
1069		}
1070	}
1071	return (0);
1072}
1073
1074static const char *mlx5e_sq_stats_desc[] = {
1075	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1076};
1077
1078void
1079mlx5e_update_sq_inline(struct mlx5e_sq *sq)
1080{
1081	sq->max_inline = sq->priv->params.tx_max_inline;
1082	sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
1083
1084	/*
1085	 * Check if trust state is DSCP or if inline mode is NONE which
1086	 * indicates CX-5 or newer hardware.
1087	 */
1088	if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
1089	    sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
1090		if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
1091			sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
1092		else
1093			sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
1094	} else {
1095		sq->min_insert_caps = 0;
1096	}
1097}
1098
1099static void
1100mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1101{
1102	int i;
1103
1104	for (i = 0; i != c->num_tc; i++) {
1105		mtx_lock(&c->sq[i].lock);
1106		mlx5e_update_sq_inline(&c->sq[i]);
1107		mtx_unlock(&c->sq[i].lock);
1108	}
1109}
1110
1111void
1112mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
1113{
1114	int i;
1115
1116	/* check if channels are closed */
1117	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
1118		return;
1119
1120	for (i = 0; i < priv->params.num_channels; i++)
1121		mlx5e_refresh_sq_inline_sub(priv, &priv->channel[i]);
1122}
1123
1124static int
1125mlx5e_create_sq(struct mlx5e_channel *c,
1126    int tc,
1127    struct mlx5e_sq_param *param,
1128    struct mlx5e_sq *sq)
1129{
1130	struct mlx5e_priv *priv = c->priv;
1131	struct mlx5_core_dev *mdev = priv->mdev;
1132	char buffer[16];
1133	void *sqc = param->sqc;
1134	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1135	int err;
1136
1137	/* Create DMA descriptor TAG */
1138	if ((err = -bus_dma_tag_create(
1139	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
1140	    1,				/* any alignment */
1141	    0,				/* no boundary */
1142	    BUS_SPACE_MAXADDR,		/* lowaddr */
1143	    BUS_SPACE_MAXADDR,		/* highaddr */
1144	    NULL, NULL,			/* filter, filterarg */
1145	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
1146	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
1147	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
1148	    0,				/* flags */
1149	    NULL, NULL,			/* lockfunc, lockfuncarg */
1150	    &sq->dma_tag)))
1151		goto done;
1152
1153	err = mlx5_alloc_map_uar(mdev, &sq->uar);
1154	if (err)
1155		goto err_free_dma_tag;
1156
1157	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1158	    &sq->wq_ctrl);
1159	if (err)
1160		goto err_unmap_free_uar;
1161
1162	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1163	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1164
1165	err = mlx5e_alloc_sq_db(sq);
1166	if (err)
1167		goto err_sq_wq_destroy;
1168
1169	sq->mkey_be = c->mkey_be;
1170	sq->ifp = priv->ifp;
1171	sq->priv = priv;
1172	sq->tc = tc;
1173
1174	mlx5e_update_sq_inline(sq);
1175
1176	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1177	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1178	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1179	    sq->stats.arg);
1180
1181	return (0);
1182
1183err_sq_wq_destroy:
1184	mlx5_wq_destroy(&sq->wq_ctrl);
1185
1186err_unmap_free_uar:
1187	mlx5_unmap_free_uar(mdev, &sq->uar);
1188
1189err_free_dma_tag:
1190	bus_dma_tag_destroy(sq->dma_tag);
1191done:
1192	return (err);
1193}
1194
1195static void
1196mlx5e_destroy_sq(struct mlx5e_sq *sq)
1197{
1198	/* destroy all sysctl nodes */
1199	sysctl_ctx_free(&sq->stats.ctx);
1200
1201	mlx5e_free_sq_db(sq);
1202	mlx5_wq_destroy(&sq->wq_ctrl);
1203	mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1204}
1205
1206int
1207mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1208    int tis_num)
1209{
1210	void *in;
1211	void *sqc;
1212	void *wq;
1213	int inlen;
1214	int err;
1215
1216	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1217	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1218	in = mlx5_vzalloc(inlen);
1219	if (in == NULL)
1220		return (-ENOMEM);
1221
1222	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1223	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1224
1225	memcpy(sqc, param->sqc, sizeof(param->sqc));
1226
1227	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1228	MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1229	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1230	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1231	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1232
1233	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1234	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1235	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1236	    PAGE_SHIFT);
1237	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1238
1239	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1240	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1241
1242	err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1243
1244	kvfree(in);
1245
1246	return (err);
1247}
1248
1249int
1250mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1251{
1252	void *in;
1253	void *sqc;
1254	int inlen;
1255	int err;
1256
1257	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1258	in = mlx5_vzalloc(inlen);
1259	if (in == NULL)
1260		return (-ENOMEM);
1261
1262	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1263
1264	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1265	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1266	MLX5_SET(sqc, sqc, state, next_state);
1267
1268	err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1269
1270	kvfree(in);
1271
1272	return (err);
1273}
1274
1275void
1276mlx5e_disable_sq(struct mlx5e_sq *sq)
1277{
1278
1279	mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1280}
1281
1282static int
1283mlx5e_open_sq(struct mlx5e_channel *c,
1284    int tc,
1285    struct mlx5e_sq_param *param,
1286    struct mlx5e_sq *sq)
1287{
1288	int err;
1289
1290	err = mlx5e_create_sq(c, tc, param, sq);
1291	if (err)
1292		return (err);
1293
1294	err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1295	if (err)
1296		goto err_destroy_sq;
1297
1298	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1299	if (err)
1300		goto err_disable_sq;
1301
1302	WRITE_ONCE(sq->running, 1);
1303
1304	return (0);
1305
1306err_disable_sq:
1307	mlx5e_disable_sq(sq);
1308err_destroy_sq:
1309	mlx5e_destroy_sq(sq);
1310
1311	return (err);
1312}
1313
1314static void
1315mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1316{
1317	/* fill up remainder with NOPs */
1318	while (sq->cev_counter != 0) {
1319		while (!mlx5e_sq_has_room_for(sq, 1)) {
1320			if (can_sleep != 0) {
1321				mtx_unlock(&sq->lock);
1322				msleep(4);
1323				mtx_lock(&sq->lock);
1324			} else {
1325				goto done;
1326			}
1327		}
1328		/* send a single NOP */
1329		mlx5e_send_nop(sq, 1);
1330		atomic_thread_fence_rel();
1331	}
1332done:
1333	/* Check if we need to write the doorbell */
1334	if (likely(sq->doorbell.d64 != 0)) {
1335		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1336		sq->doorbell.d64 = 0;
1337	}
1338}
1339
1340void
1341mlx5e_sq_cev_timeout(void *arg)
1342{
1343	struct mlx5e_sq *sq = arg;
1344
1345	mtx_assert(&sq->lock, MA_OWNED);
1346
1347	/* check next state */
1348	switch (sq->cev_next_state) {
1349	case MLX5E_CEV_STATE_SEND_NOPS:
1350		/* fill TX ring with NOPs, if any */
1351		mlx5e_sq_send_nops_locked(sq, 0);
1352
1353		/* check if completed */
1354		if (sq->cev_counter == 0) {
1355			sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1356			return;
1357		}
1358		break;
1359	default:
1360		/* send NOPs on next timeout */
1361		sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1362		break;
1363	}
1364
1365	/* restart timer */
1366	callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1367}
1368
1369void
1370mlx5e_drain_sq(struct mlx5e_sq *sq)
1371{
1372	int error;
1373	struct mlx5_core_dev *mdev= sq->priv->mdev;
1374
1375	/*
1376	 * Check if already stopped.
1377	 *
1378	 * NOTE: Serialization of this function is managed by the
1379	 * caller ensuring the priv's state lock is locked or in case
1380	 * of rate limit support, a single thread manages drain and
1381	 * resume of SQs. The "running" variable can therefore safely
1382	 * be read without any locks.
1383	 */
1384	if (READ_ONCE(sq->running) == 0)
1385		return;
1386
1387	/* don't put more packets into the SQ */
1388	WRITE_ONCE(sq->running, 0);
1389
1390	/* serialize access to DMA rings */
1391	mtx_lock(&sq->lock);
1392
1393	/* teardown event factor timer, if any */
1394	sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1395	callout_stop(&sq->cev_callout);
1396
1397	/* send dummy NOPs in order to flush the transmit ring */
1398	mlx5e_sq_send_nops_locked(sq, 1);
1399	mtx_unlock(&sq->lock);
1400
1401	/* make sure it is safe to free the callout */
1402	callout_drain(&sq->cev_callout);
1403
1404	/* wait till SQ is empty or link is down */
1405	mtx_lock(&sq->lock);
1406	while (sq->cc != sq->pc &&
1407	    (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
1408	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1409		mtx_unlock(&sq->lock);
1410		msleep(1);
1411		sq->cq.mcq.comp(&sq->cq.mcq);
1412		mtx_lock(&sq->lock);
1413	}
1414	mtx_unlock(&sq->lock);
1415
1416	/* error out remaining requests */
1417	error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1418	if (error != 0) {
1419		if_printf(sq->ifp,
1420		    "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1421	}
1422
1423	/* wait till SQ is empty */
1424	mtx_lock(&sq->lock);
1425	while (sq->cc != sq->pc &&
1426	       mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1427		mtx_unlock(&sq->lock);
1428		msleep(1);
1429		sq->cq.mcq.comp(&sq->cq.mcq);
1430		mtx_lock(&sq->lock);
1431	}
1432	mtx_unlock(&sq->lock);
1433}
1434
1435static void
1436mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1437{
1438
1439	mlx5e_drain_sq(sq);
1440	mlx5e_disable_sq(sq);
1441	mlx5e_destroy_sq(sq);
1442}
1443
1444static int
1445mlx5e_create_cq(struct mlx5e_priv *priv,
1446    struct mlx5e_cq_param *param,
1447    struct mlx5e_cq *cq,
1448    mlx5e_cq_comp_t *comp,
1449    int eq_ix)
1450{
1451	struct mlx5_core_dev *mdev = priv->mdev;
1452	struct mlx5_core_cq *mcq = &cq->mcq;
1453	int eqn_not_used;
1454	int irqn;
1455	int err;
1456	u32 i;
1457
1458	param->wq.buf_numa_node = 0;
1459	param->wq.db_numa_node = 0;
1460
1461	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1462	    &cq->wq_ctrl);
1463	if (err)
1464		return (err);
1465
1466	mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1467
1468	mcq->cqe_sz = 64;
1469	mcq->set_ci_db = cq->wq_ctrl.db.db;
1470	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1471	*mcq->set_ci_db = 0;
1472	*mcq->arm_db = 0;
1473	mcq->vector = eq_ix;
1474	mcq->comp = comp;
1475	mcq->event = mlx5e_cq_error_event;
1476	mcq->irqn = irqn;
1477	mcq->uar = &priv->cq_uar;
1478
1479	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1480		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1481
1482		cqe->op_own = 0xf1;
1483	}
1484
1485	cq->priv = priv;
1486
1487	return (0);
1488}
1489
1490static void
1491mlx5e_destroy_cq(struct mlx5e_cq *cq)
1492{
1493	mlx5_wq_destroy(&cq->wq_ctrl);
1494}
1495
1496static int
1497mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1498{
1499	struct mlx5_core_cq *mcq = &cq->mcq;
1500	void *in;
1501	void *cqc;
1502	int inlen;
1503	int irqn_not_used;
1504	int eqn;
1505	int err;
1506
1507	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1508	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1509	in = mlx5_vzalloc(inlen);
1510	if (in == NULL)
1511		return (-ENOMEM);
1512
1513	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1514
1515	memcpy(cqc, param->cqc, sizeof(param->cqc));
1516
1517	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1518	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1519
1520	mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1521
1522	MLX5_SET(cqc, cqc, c_eqn, eqn);
1523	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1524	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1525	    PAGE_SHIFT);
1526	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1527
1528	err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1529
1530	kvfree(in);
1531
1532	if (err)
1533		return (err);
1534
1535	mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1536
1537	return (0);
1538}
1539
1540static void
1541mlx5e_disable_cq(struct mlx5e_cq *cq)
1542{
1543
1544	mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1545}
1546
1547int
1548mlx5e_open_cq(struct mlx5e_priv *priv,
1549    struct mlx5e_cq_param *param,
1550    struct mlx5e_cq *cq,
1551    mlx5e_cq_comp_t *comp,
1552    int eq_ix)
1553{
1554	int err;
1555
1556	err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1557	if (err)
1558		return (err);
1559
1560	err = mlx5e_enable_cq(cq, param, eq_ix);
1561	if (err)
1562		goto err_destroy_cq;
1563
1564	return (0);
1565
1566err_destroy_cq:
1567	mlx5e_destroy_cq(cq);
1568
1569	return (err);
1570}
1571
1572void
1573mlx5e_close_cq(struct mlx5e_cq *cq)
1574{
1575	mlx5e_disable_cq(cq);
1576	mlx5e_destroy_cq(cq);
1577}
1578
1579static int
1580mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1581    struct mlx5e_channel_param *cparam)
1582{
1583	int err;
1584	int tc;
1585
1586	for (tc = 0; tc < c->num_tc; tc++) {
1587		/* open completion queue */
1588		err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1589		    &mlx5e_tx_cq_comp, c->ix);
1590		if (err)
1591			goto err_close_tx_cqs;
1592	}
1593	return (0);
1594
1595err_close_tx_cqs:
1596	for (tc--; tc >= 0; tc--)
1597		mlx5e_close_cq(&c->sq[tc].cq);
1598
1599	return (err);
1600}
1601
1602static void
1603mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1604{
1605	int tc;
1606
1607	for (tc = 0; tc < c->num_tc; tc++)
1608		mlx5e_close_cq(&c->sq[tc].cq);
1609}
1610
1611static int
1612mlx5e_open_sqs(struct mlx5e_channel *c,
1613    struct mlx5e_channel_param *cparam)
1614{
1615	int err;
1616	int tc;
1617
1618	for (tc = 0; tc < c->num_tc; tc++) {
1619		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1620		if (err)
1621			goto err_close_sqs;
1622	}
1623
1624	return (0);
1625
1626err_close_sqs:
1627	for (tc--; tc >= 0; tc--)
1628		mlx5e_close_sq_wait(&c->sq[tc]);
1629
1630	return (err);
1631}
1632
1633static void
1634mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1635{
1636	int tc;
1637
1638	for (tc = 0; tc < c->num_tc; tc++)
1639		mlx5e_close_sq_wait(&c->sq[tc]);
1640}
1641
1642static void
1643mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1644{
1645	int tc;
1646
1647	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1648
1649	callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1650
1651	for (tc = 0; tc < c->num_tc; tc++) {
1652		struct mlx5e_sq *sq = c->sq + tc;
1653
1654		mtx_init(&sq->lock, "mlx5tx",
1655		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1656		mtx_init(&sq->comp_lock, "mlx5comp",
1657		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1658
1659		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1660
1661		sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1662
1663		/* ensure the TX completion event factor is not zero */
1664		if (sq->cev_factor == 0)
1665			sq->cev_factor = 1;
1666	}
1667}
1668
1669static void
1670mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1671{
1672	int tc;
1673
1674	mtx_destroy(&c->rq.mtx);
1675
1676	for (tc = 0; tc < c->num_tc; tc++) {
1677		mtx_destroy(&c->sq[tc].lock);
1678		mtx_destroy(&c->sq[tc].comp_lock);
1679	}
1680}
1681
1682static int
1683mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1684    struct mlx5e_channel_param *cparam,
1685    struct mlx5e_channel *c)
1686{
1687	int err;
1688
1689	memset(c, 0, sizeof(*c));
1690
1691	c->priv = priv;
1692	c->ix = ix;
1693	c->ifp = priv->ifp;
1694	c->mkey_be = cpu_to_be32(priv->mr.key);
1695	c->num_tc = priv->num_tc;
1696
1697	/* init mutexes */
1698	mlx5e_chan_mtx_init(c);
1699
1700	/* open transmit completion queue */
1701	err = mlx5e_open_tx_cqs(c, cparam);
1702	if (err)
1703		goto err_free;
1704
1705	/* open receive completion queue */
1706	err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1707	    &mlx5e_rx_cq_comp, c->ix);
1708	if (err)
1709		goto err_close_tx_cqs;
1710
1711	err = mlx5e_open_sqs(c, cparam);
1712	if (err)
1713		goto err_close_rx_cq;
1714
1715	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1716	if (err)
1717		goto err_close_sqs;
1718
1719	/* poll receive queue initially */
1720	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1721
1722	return (0);
1723
1724err_close_sqs:
1725	mlx5e_close_sqs_wait(c);
1726
1727err_close_rx_cq:
1728	mlx5e_close_cq(&c->rq.cq);
1729
1730err_close_tx_cqs:
1731	mlx5e_close_tx_cqs(c);
1732
1733err_free:
1734	/* destroy mutexes */
1735	mlx5e_chan_mtx_destroy(c);
1736	return (err);
1737}
1738
1739static void
1740mlx5e_close_channel(struct mlx5e_channel *c)
1741{
1742	mlx5e_close_rq(&c->rq);
1743}
1744
1745static void
1746mlx5e_close_channel_wait(struct mlx5e_channel *c)
1747{
1748	mlx5e_close_rq_wait(&c->rq);
1749	mlx5e_close_sqs_wait(c);
1750	mlx5e_close_cq(&c->rq.cq);
1751	mlx5e_close_tx_cqs(c);
1752	/* destroy mutexes */
1753	mlx5e_chan_mtx_destroy(c);
1754}
1755
1756static int
1757mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
1758{
1759	u32 r, n;
1760
1761	r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
1762	    MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
1763	if (r > MJUM16BYTES)
1764		return (-ENOMEM);
1765
1766	if (r > MJUM9BYTES)
1767		r = MJUM16BYTES;
1768	else if (r > MJUMPAGESIZE)
1769		r = MJUM9BYTES;
1770	else if (r > MCLBYTES)
1771		r = MJUMPAGESIZE;
1772	else
1773		r = MCLBYTES;
1774
1775	/*
1776	 * n + 1 must be a power of two, because stride size must be.
1777	 * Stride size is 16 * (n + 1), as the first segment is
1778	 * control.
1779	 */
1780	for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
1781		;
1782
1783	*wqe_sz = r;
1784	*nsegs = n;
1785	return (0);
1786}
1787
1788static void
1789mlx5e_build_rq_param(struct mlx5e_priv *priv,
1790    struct mlx5e_rq_param *param)
1791{
1792	void *rqc = param->rqc;
1793	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1794	u32 wqe_sz, nsegs;
1795
1796	mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
1797	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1798	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1799	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
1800	    nsegs * sizeof(struct mlx5_wqe_data_seg)));
1801	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1802	MLX5_SET(wq, wq, pd, priv->pdn);
1803
1804	param->wq.buf_numa_node = 0;
1805	param->wq.db_numa_node = 0;
1806	param->wq.linear = 1;
1807}
1808
1809static void
1810mlx5e_build_sq_param(struct mlx5e_priv *priv,
1811    struct mlx5e_sq_param *param)
1812{
1813	void *sqc = param->sqc;
1814	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1815
1816	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1817	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1818	MLX5_SET(wq, wq, pd, priv->pdn);
1819
1820	param->wq.buf_numa_node = 0;
1821	param->wq.db_numa_node = 0;
1822	param->wq.linear = 1;
1823}
1824
1825static void
1826mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1827    struct mlx5e_cq_param *param)
1828{
1829	void *cqc = param->cqc;
1830
1831	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1832}
1833
1834static void
1835mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct net_dim_cq_moder *ptr)
1836{
1837
1838	*ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE);
1839
1840	/* apply LRO restrictions */
1841	if (priv->params.hw_lro_en &&
1842	    ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
1843		ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
1844	}
1845}
1846
1847static void
1848mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1849    struct mlx5e_cq_param *param)
1850{
1851	struct net_dim_cq_moder curr;
1852	void *cqc = param->cqc;
1853
1854
1855	/*
1856	 * TODO The sysctl to control on/off is a bool value for now, which means
1857	 * we only support CSUM, once HASH is implemnted we'll need to address that.
1858	 */
1859	if (priv->params.cqe_zipping_en) {
1860		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1861		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1862	}
1863
1864	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1865
1866	switch (priv->params.rx_cq_moderation_mode) {
1867	case 0:
1868		MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1869		MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1870		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1871		break;
1872	case 1:
1873		MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1874		MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1875		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1876			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1877		else
1878			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1879		break;
1880	case 2:
1881		mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr);
1882		MLX5_SET(cqc, cqc, cq_period, curr.usec);
1883		MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
1884		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1885		break;
1886	case 3:
1887		mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr);
1888		MLX5_SET(cqc, cqc, cq_period, curr.usec);
1889		MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
1890		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1891			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1892		else
1893			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1894		break;
1895	default:
1896		break;
1897	}
1898
1899	mlx5e_dim_build_cq_param(priv, param);
1900
1901	mlx5e_build_common_cq_param(priv, param);
1902}
1903
1904static void
1905mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1906    struct mlx5e_cq_param *param)
1907{
1908	void *cqc = param->cqc;
1909
1910	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1911	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1912	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1913
1914	switch (priv->params.tx_cq_moderation_mode) {
1915	case 0:
1916		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1917		break;
1918	default:
1919		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1920			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1921		else
1922			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1923		break;
1924	}
1925
1926	mlx5e_build_common_cq_param(priv, param);
1927}
1928
1929static void
1930mlx5e_build_channel_param(struct mlx5e_priv *priv,
1931    struct mlx5e_channel_param *cparam)
1932{
1933	memset(cparam, 0, sizeof(*cparam));
1934
1935	mlx5e_build_rq_param(priv, &cparam->rq);
1936	mlx5e_build_sq_param(priv, &cparam->sq);
1937	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1938	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1939}
1940
1941static int
1942mlx5e_open_channels(struct mlx5e_priv *priv)
1943{
1944	struct mlx5e_channel_param cparam;
1945	int err;
1946	int i;
1947	int j;
1948
1949	mlx5e_build_channel_param(priv, &cparam);
1950	for (i = 0; i < priv->params.num_channels; i++) {
1951		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1952		if (err)
1953			goto err_close_channels;
1954	}
1955
1956	for (j = 0; j < priv->params.num_channels; j++) {
1957		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j].rq);
1958		if (err)
1959			goto err_close_channels;
1960	}
1961
1962	return (0);
1963
1964err_close_channels:
1965	while (i--) {
1966		mlx5e_close_channel(&priv->channel[i]);
1967		mlx5e_close_channel_wait(&priv->channel[i]);
1968	}
1969	return (err);
1970}
1971
1972static void
1973mlx5e_close_channels(struct mlx5e_priv *priv)
1974{
1975	int i;
1976
1977	for (i = 0; i < priv->params.num_channels; i++)
1978		mlx5e_close_channel(&priv->channel[i]);
1979	for (i = 0; i < priv->params.num_channels; i++)
1980		mlx5e_close_channel_wait(&priv->channel[i]);
1981}
1982
1983static int
1984mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1985{
1986
1987	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1988		uint8_t cq_mode;
1989
1990		switch (priv->params.tx_cq_moderation_mode) {
1991		case 0:
1992		case 2:
1993			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1994			break;
1995		default:
1996			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1997			break;
1998		}
1999
2000		return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
2001		    priv->params.tx_cq_moderation_usec,
2002		    priv->params.tx_cq_moderation_pkts,
2003		    cq_mode));
2004	}
2005
2006	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
2007	    priv->params.tx_cq_moderation_usec,
2008	    priv->params.tx_cq_moderation_pkts));
2009}
2010
2011static int
2012mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
2013{
2014
2015	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2016		uint8_t cq_mode;
2017		uint8_t dim_mode;
2018		int retval;
2019
2020		switch (priv->params.rx_cq_moderation_mode) {
2021		case 0:
2022		case 2:
2023			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2024			dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
2025			break;
2026		default:
2027			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2028			dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
2029			break;
2030		}
2031
2032		/* tear down dynamic interrupt moderation */
2033		mtx_lock(&rq->mtx);
2034		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
2035		mtx_unlock(&rq->mtx);
2036
2037		/* wait for dynamic interrupt moderation work task, if any */
2038		cancel_work_sync(&rq->dim.work);
2039
2040		if (priv->params.rx_cq_moderation_mode >= 2) {
2041			struct net_dim_cq_moder curr;
2042
2043			mlx5e_get_default_profile(priv, dim_mode, &curr);
2044
2045			retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2046			    curr.usec, curr.pkts, cq_mode);
2047
2048			/* set dynamic interrupt moderation mode and zero defaults */
2049			mtx_lock(&rq->mtx);
2050			rq->dim.mode = dim_mode;
2051			rq->dim.state = 0;
2052			rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE;
2053			mtx_unlock(&rq->mtx);
2054		} else {
2055			retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2056			    priv->params.rx_cq_moderation_usec,
2057			    priv->params.rx_cq_moderation_pkts,
2058			    cq_mode);
2059		}
2060		return (retval);
2061	}
2062
2063	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
2064	    priv->params.rx_cq_moderation_usec,
2065	    priv->params.rx_cq_moderation_pkts));
2066}
2067
2068static int
2069mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
2070{
2071	int err;
2072	int i;
2073
2074	err = mlx5e_refresh_rq_params(priv, &c->rq);
2075	if (err)
2076		goto done;
2077
2078	for (i = 0; i != c->num_tc; i++) {
2079		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2080		if (err)
2081			goto done;
2082	}
2083done:
2084	return (err);
2085}
2086
2087int
2088mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2089{
2090	int i;
2091
2092	/* check if channels are closed */
2093	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2094		return (EINVAL);
2095
2096	for (i = 0; i < priv->params.num_channels; i++) {
2097		int err;
2098
2099		err = mlx5e_refresh_channel_params_sub(priv, &priv->channel[i]);
2100		if (err)
2101			return (err);
2102	}
2103	return (0);
2104}
2105
2106static int
2107mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2108{
2109	struct mlx5_core_dev *mdev = priv->mdev;
2110	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2111	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2112
2113	memset(in, 0, sizeof(in));
2114
2115	MLX5_SET(tisc, tisc, prio, tc);
2116	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2117
2118	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2119}
2120
2121static void
2122mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2123{
2124	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2125}
2126
2127static int
2128mlx5e_open_tises(struct mlx5e_priv *priv)
2129{
2130	int num_tc = priv->num_tc;
2131	int err;
2132	int tc;
2133
2134	for (tc = 0; tc < num_tc; tc++) {
2135		err = mlx5e_open_tis(priv, tc);
2136		if (err)
2137			goto err_close_tises;
2138	}
2139
2140	return (0);
2141
2142err_close_tises:
2143	for (tc--; tc >= 0; tc--)
2144		mlx5e_close_tis(priv, tc);
2145
2146	return (err);
2147}
2148
2149static void
2150mlx5e_close_tises(struct mlx5e_priv *priv)
2151{
2152	int num_tc = priv->num_tc;
2153	int tc;
2154
2155	for (tc = 0; tc < num_tc; tc++)
2156		mlx5e_close_tis(priv, tc);
2157}
2158
2159static int
2160mlx5e_open_rqt(struct mlx5e_priv *priv)
2161{
2162	struct mlx5_core_dev *mdev = priv->mdev;
2163	u32 *in;
2164	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
2165	void *rqtc;
2166	int inlen;
2167	int err;
2168	int sz;
2169	int i;
2170
2171	sz = 1 << priv->params.rx_hash_log_tbl_sz;
2172
2173	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2174	in = mlx5_vzalloc(inlen);
2175	if (in == NULL)
2176		return (-ENOMEM);
2177	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2178
2179	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2180	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2181
2182	for (i = 0; i < sz; i++) {
2183		int ix = i;
2184#ifdef RSS
2185		ix = rss_get_indirection_to_bucket(ix);
2186#endif
2187		/* ensure we don't overflow */
2188		ix %= priv->params.num_channels;
2189
2190		/* apply receive side scaling stride, if any */
2191		ix -= ix % (int)priv->params.channels_rsss;
2192
2193		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix].rq.rqn);
2194	}
2195
2196	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2197
2198	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
2199	if (!err)
2200		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2201
2202	kvfree(in);
2203
2204	return (err);
2205}
2206
2207static void
2208mlx5e_close_rqt(struct mlx5e_priv *priv)
2209{
2210	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
2211	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
2212
2213	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2214	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2215
2216	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
2217}
2218
2219static void
2220mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2221{
2222	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2223	__be32 *hkey;
2224
2225	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2226
2227#define	ROUGH_MAX_L2_L3_HDR_SZ 256
2228
2229#define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2230			  MLX5_HASH_FIELD_SEL_DST_IP)
2231
2232#define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2233			  MLX5_HASH_FIELD_SEL_DST_IP   |\
2234			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
2235			  MLX5_HASH_FIELD_SEL_L4_DPORT)
2236
2237#define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
2238				 MLX5_HASH_FIELD_SEL_DST_IP   |\
2239				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2240
2241	if (priv->params.hw_lro_en) {
2242		MLX5_SET(tirc, tirc, lro_enable_mask,
2243		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2244		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2245		MLX5_SET(tirc, tirc, lro_max_msg_sz,
2246		    (priv->params.lro_wqe_sz -
2247		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2248		/* TODO: add the option to choose timer value dynamically */
2249		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2250		    MLX5_CAP_ETH(priv->mdev,
2251		    lro_timer_supported_periods[2]));
2252	}
2253
2254	/* setup parameters for hashing TIR type, if any */
2255	switch (tt) {
2256	case MLX5E_TT_ANY:
2257		MLX5_SET(tirc, tirc, disp_type,
2258		    MLX5_TIRC_DISP_TYPE_DIRECT);
2259		MLX5_SET(tirc, tirc, inline_rqn,
2260		    priv->channel[0].rq.rqn);
2261		break;
2262	default:
2263		MLX5_SET(tirc, tirc, disp_type,
2264		    MLX5_TIRC_DISP_TYPE_INDIRECT);
2265		MLX5_SET(tirc, tirc, indirect_table,
2266		    priv->rqtn);
2267		MLX5_SET(tirc, tirc, rx_hash_fn,
2268		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2269		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2270#ifdef RSS
2271		/*
2272		 * The FreeBSD RSS implementation does currently not
2273		 * support symmetric Toeplitz hashes:
2274		 */
2275		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2276		rss_getkey((uint8_t *)hkey);
2277#else
2278		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2279		hkey[0] = cpu_to_be32(0xD181C62C);
2280		hkey[1] = cpu_to_be32(0xF7F4DB5B);
2281		hkey[2] = cpu_to_be32(0x1983A2FC);
2282		hkey[3] = cpu_to_be32(0x943E1ADB);
2283		hkey[4] = cpu_to_be32(0xD9389E6B);
2284		hkey[5] = cpu_to_be32(0xD1039C2C);
2285		hkey[6] = cpu_to_be32(0xA74499AD);
2286		hkey[7] = cpu_to_be32(0x593D56D9);
2287		hkey[8] = cpu_to_be32(0xF3253C06);
2288		hkey[9] = cpu_to_be32(0x2ADC1FFC);
2289#endif
2290		break;
2291	}
2292
2293	switch (tt) {
2294	case MLX5E_TT_IPV4_TCP:
2295		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2296		    MLX5_L3_PROT_TYPE_IPV4);
2297		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2298		    MLX5_L4_PROT_TYPE_TCP);
2299#ifdef RSS
2300		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2301			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2302			    MLX5_HASH_IP);
2303		} else
2304#endif
2305		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2306		    MLX5_HASH_ALL);
2307		break;
2308
2309	case MLX5E_TT_IPV6_TCP:
2310		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2311		    MLX5_L3_PROT_TYPE_IPV6);
2312		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2313		    MLX5_L4_PROT_TYPE_TCP);
2314#ifdef RSS
2315		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2316			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2317			    MLX5_HASH_IP);
2318		} else
2319#endif
2320		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2321		    MLX5_HASH_ALL);
2322		break;
2323
2324	case MLX5E_TT_IPV4_UDP:
2325		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2326		    MLX5_L3_PROT_TYPE_IPV4);
2327		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2328		    MLX5_L4_PROT_TYPE_UDP);
2329#ifdef RSS
2330		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2331			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2332			    MLX5_HASH_IP);
2333		} else
2334#endif
2335		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2336		    MLX5_HASH_ALL);
2337		break;
2338
2339	case MLX5E_TT_IPV6_UDP:
2340		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2341		    MLX5_L3_PROT_TYPE_IPV6);
2342		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2343		    MLX5_L4_PROT_TYPE_UDP);
2344#ifdef RSS
2345		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2346			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2347			    MLX5_HASH_IP);
2348		} else
2349#endif
2350		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2351		    MLX5_HASH_ALL);
2352		break;
2353
2354	case MLX5E_TT_IPV4_IPSEC_AH:
2355		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2356		    MLX5_L3_PROT_TYPE_IPV4);
2357		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2358		    MLX5_HASH_IP_IPSEC_SPI);
2359		break;
2360
2361	case MLX5E_TT_IPV6_IPSEC_AH:
2362		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2363		    MLX5_L3_PROT_TYPE_IPV6);
2364		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2365		    MLX5_HASH_IP_IPSEC_SPI);
2366		break;
2367
2368	case MLX5E_TT_IPV4_IPSEC_ESP:
2369		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2370		    MLX5_L3_PROT_TYPE_IPV4);
2371		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2372		    MLX5_HASH_IP_IPSEC_SPI);
2373		break;
2374
2375	case MLX5E_TT_IPV6_IPSEC_ESP:
2376		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2377		    MLX5_L3_PROT_TYPE_IPV6);
2378		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2379		    MLX5_HASH_IP_IPSEC_SPI);
2380		break;
2381
2382	case MLX5E_TT_IPV4:
2383		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2384		    MLX5_L3_PROT_TYPE_IPV4);
2385		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2386		    MLX5_HASH_IP);
2387		break;
2388
2389	case MLX5E_TT_IPV6:
2390		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2391		    MLX5_L3_PROT_TYPE_IPV6);
2392		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2393		    MLX5_HASH_IP);
2394		break;
2395
2396	default:
2397		break;
2398	}
2399}
2400
2401static int
2402mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2403{
2404	struct mlx5_core_dev *mdev = priv->mdev;
2405	u32 *in;
2406	void *tirc;
2407	int inlen;
2408	int err;
2409
2410	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2411	in = mlx5_vzalloc(inlen);
2412	if (in == NULL)
2413		return (-ENOMEM);
2414	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2415
2416	mlx5e_build_tir_ctx(priv, tirc, tt);
2417
2418	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2419
2420	kvfree(in);
2421
2422	return (err);
2423}
2424
2425static void
2426mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2427{
2428	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2429}
2430
2431static int
2432mlx5e_open_tirs(struct mlx5e_priv *priv)
2433{
2434	int err;
2435	int i;
2436
2437	for (i = 0; i < MLX5E_NUM_TT; i++) {
2438		err = mlx5e_open_tir(priv, i);
2439		if (err)
2440			goto err_close_tirs;
2441	}
2442
2443	return (0);
2444
2445err_close_tirs:
2446	for (i--; i >= 0; i--)
2447		mlx5e_close_tir(priv, i);
2448
2449	return (err);
2450}
2451
2452static void
2453mlx5e_close_tirs(struct mlx5e_priv *priv)
2454{
2455	int i;
2456
2457	for (i = 0; i < MLX5E_NUM_TT; i++)
2458		mlx5e_close_tir(priv, i);
2459}
2460
2461/*
2462 * SW MTU does not include headers,
2463 * HW MTU includes all headers and checksums.
2464 */
2465static int
2466mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2467{
2468	struct mlx5e_priv *priv = ifp->if_softc;
2469	struct mlx5_core_dev *mdev = priv->mdev;
2470	int hw_mtu;
2471	int err;
2472
2473	hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
2474
2475	err = mlx5_set_port_mtu(mdev, hw_mtu);
2476	if (err) {
2477		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2478		    __func__, sw_mtu, err);
2479		return (err);
2480	}
2481
2482	/* Update vport context MTU */
2483	err = mlx5_set_vport_mtu(mdev, hw_mtu);
2484	if (err) {
2485		if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
2486		    __func__, err);
2487	}
2488
2489	ifp->if_mtu = sw_mtu;
2490
2491	err = mlx5_query_vport_mtu(mdev, &hw_mtu);
2492	if (err || !hw_mtu) {
2493		/* fallback to port oper mtu */
2494		err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2495	}
2496	if (err) {
2497		if_printf(ifp, "Query port MTU, after setting new "
2498		    "MTU value, failed\n");
2499		return (err);
2500	} else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2501		err = -E2BIG,
2502		if_printf(ifp, "Port MTU %d is smaller than "
2503                    "ifp mtu %d\n", hw_mtu, sw_mtu);
2504	} else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2505		err = -EINVAL;
2506                if_printf(ifp, "Port MTU %d is bigger than "
2507                    "ifp mtu %d\n", hw_mtu, sw_mtu);
2508	}
2509	priv->params_ethtool.hw_mtu = hw_mtu;
2510
2511	return (err);
2512}
2513
2514int
2515mlx5e_open_locked(struct ifnet *ifp)
2516{
2517	struct mlx5e_priv *priv = ifp->if_softc;
2518	int err;
2519	u16 set_id;
2520
2521	/* check if already opened */
2522	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2523		return (0);
2524
2525#ifdef RSS
2526	if (rss_getnumbuckets() > priv->params.num_channels) {
2527		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2528		    "channels(%u) available\n", rss_getnumbuckets(),
2529		    priv->params.num_channels);
2530	}
2531#endif
2532	err = mlx5e_open_tises(priv);
2533	if (err) {
2534		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2535		    __func__, err);
2536		return (err);
2537	}
2538	err = mlx5_vport_alloc_q_counter(priv->mdev,
2539	    MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2540	if (err) {
2541		if_printf(priv->ifp,
2542		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2543		    __func__, err);
2544		goto err_close_tises;
2545	}
2546	/* store counter set ID */
2547	priv->counter_set_id = set_id;
2548
2549	err = mlx5e_open_channels(priv);
2550	if (err) {
2551		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2552		    __func__, err);
2553		goto err_dalloc_q_counter;
2554	}
2555	err = mlx5e_open_rqt(priv);
2556	if (err) {
2557		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2558		    __func__, err);
2559		goto err_close_channels;
2560	}
2561	err = mlx5e_open_tirs(priv);
2562	if (err) {
2563		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2564		    __func__, err);
2565		goto err_close_rqls;
2566	}
2567	err = mlx5e_open_flow_table(priv);
2568	if (err) {
2569		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2570		    __func__, err);
2571		goto err_close_tirs;
2572	}
2573	err = mlx5e_add_all_vlan_rules(priv);
2574	if (err) {
2575		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2576		    __func__, err);
2577		goto err_close_flow_table;
2578	}
2579	set_bit(MLX5E_STATE_OPENED, &priv->state);
2580
2581	mlx5e_update_carrier(priv);
2582	mlx5e_set_rx_mode_core(priv);
2583
2584	return (0);
2585
2586err_close_flow_table:
2587	mlx5e_close_flow_table(priv);
2588
2589err_close_tirs:
2590	mlx5e_close_tirs(priv);
2591
2592err_close_rqls:
2593	mlx5e_close_rqt(priv);
2594
2595err_close_channels:
2596	mlx5e_close_channels(priv);
2597
2598err_dalloc_q_counter:
2599	mlx5_vport_dealloc_q_counter(priv->mdev,
2600	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2601
2602err_close_tises:
2603	mlx5e_close_tises(priv);
2604
2605	return (err);
2606}
2607
2608static void
2609mlx5e_open(void *arg)
2610{
2611	struct mlx5e_priv *priv = arg;
2612
2613	PRIV_LOCK(priv);
2614	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2615		if_printf(priv->ifp,
2616		    "%s: Setting port status to up failed\n",
2617		    __func__);
2618
2619	mlx5e_open_locked(priv->ifp);
2620	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2621	PRIV_UNLOCK(priv);
2622}
2623
2624int
2625mlx5e_close_locked(struct ifnet *ifp)
2626{
2627	struct mlx5e_priv *priv = ifp->if_softc;
2628
2629	/* check if already closed */
2630	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2631		return (0);
2632
2633	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2634
2635	mlx5e_set_rx_mode_core(priv);
2636	mlx5e_del_all_vlan_rules(priv);
2637	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2638	mlx5e_close_flow_table(priv);
2639	mlx5e_close_tirs(priv);
2640	mlx5e_close_rqt(priv);
2641	mlx5e_close_channels(priv);
2642	mlx5_vport_dealloc_q_counter(priv->mdev,
2643	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2644	mlx5e_close_tises(priv);
2645
2646	return (0);
2647}
2648
2649#if (__FreeBSD_version >= 1100000)
2650static uint64_t
2651mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2652{
2653	struct mlx5e_priv *priv = ifp->if_softc;
2654	u64 retval;
2655
2656	/* PRIV_LOCK(priv); XXX not allowed */
2657	switch (cnt) {
2658	case IFCOUNTER_IPACKETS:
2659		retval = priv->stats.vport.rx_packets;
2660		break;
2661	case IFCOUNTER_IERRORS:
2662		retval = priv->stats.vport.rx_error_packets +
2663		    priv->stats.pport.alignment_err +
2664		    priv->stats.pport.check_seq_err +
2665		    priv->stats.pport.crc_align_errors +
2666		    priv->stats.pport.in_range_len_errors +
2667		    priv->stats.pport.jabbers +
2668		    priv->stats.pport.out_of_range_len +
2669		    priv->stats.pport.oversize_pkts +
2670		    priv->stats.pport.symbol_err +
2671		    priv->stats.pport.too_long_errors +
2672		    priv->stats.pport.undersize_pkts +
2673		    priv->stats.pport.unsupported_op_rx;
2674		break;
2675	case IFCOUNTER_IQDROPS:
2676		retval = priv->stats.vport.rx_out_of_buffer +
2677		    priv->stats.pport.drop_events;
2678		break;
2679	case IFCOUNTER_OPACKETS:
2680		retval = priv->stats.vport.tx_packets;
2681		break;
2682	case IFCOUNTER_OERRORS:
2683		retval = priv->stats.vport.tx_error_packets;
2684		break;
2685	case IFCOUNTER_IBYTES:
2686		retval = priv->stats.vport.rx_bytes;
2687		break;
2688	case IFCOUNTER_OBYTES:
2689		retval = priv->stats.vport.tx_bytes;
2690		break;
2691	case IFCOUNTER_IMCASTS:
2692		retval = priv->stats.vport.rx_multicast_packets;
2693		break;
2694	case IFCOUNTER_OMCASTS:
2695		retval = priv->stats.vport.tx_multicast_packets;
2696		break;
2697	case IFCOUNTER_OQDROPS:
2698		retval = priv->stats.vport.tx_queue_dropped;
2699		break;
2700	case IFCOUNTER_COLLISIONS:
2701		retval = priv->stats.pport.collisions;
2702		break;
2703	default:
2704		retval = if_get_counter_default(ifp, cnt);
2705		break;
2706	}
2707	/* PRIV_UNLOCK(priv); XXX not allowed */
2708	return (retval);
2709}
2710#endif
2711
2712static void
2713mlx5e_set_rx_mode(struct ifnet *ifp)
2714{
2715	struct mlx5e_priv *priv = ifp->if_softc;
2716
2717	queue_work(priv->wq, &priv->set_rx_mode_work);
2718}
2719
2720static int
2721mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2722{
2723	struct mlx5e_priv *priv;
2724	struct ifreq *ifr;
2725	struct ifi2creq i2c;
2726	int error = 0;
2727	int mask = 0;
2728	int size_read = 0;
2729	int module_status;
2730	int module_num;
2731	int max_mtu;
2732	uint8_t read_addr;
2733
2734	priv = ifp->if_softc;
2735
2736	/* check if detaching */
2737	if (priv == NULL || priv->gone != 0)
2738		return (ENXIO);
2739
2740	switch (command) {
2741	case SIOCSIFMTU:
2742		ifr = (struct ifreq *)data;
2743
2744		PRIV_LOCK(priv);
2745		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2746
2747		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2748		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2749			int was_opened;
2750
2751			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2752			if (was_opened)
2753				mlx5e_close_locked(ifp);
2754
2755			/* set new MTU */
2756			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2757
2758			if (was_opened)
2759				mlx5e_open_locked(ifp);
2760		} else {
2761			error = EINVAL;
2762			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2763			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2764		}
2765		PRIV_UNLOCK(priv);
2766		break;
2767	case SIOCSIFFLAGS:
2768		if ((ifp->if_flags & IFF_UP) &&
2769		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2770			mlx5e_set_rx_mode(ifp);
2771			break;
2772		}
2773		PRIV_LOCK(priv);
2774		if (ifp->if_flags & IFF_UP) {
2775			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2776				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2777					mlx5e_open_locked(ifp);
2778				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2779				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2780			}
2781		} else {
2782			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2783				mlx5_set_port_status(priv->mdev,
2784				    MLX5_PORT_DOWN);
2785				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2786					mlx5e_close_locked(ifp);
2787				mlx5e_update_carrier(priv);
2788				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2789			}
2790		}
2791		PRIV_UNLOCK(priv);
2792		break;
2793	case SIOCADDMULTI:
2794	case SIOCDELMULTI:
2795		mlx5e_set_rx_mode(ifp);
2796		break;
2797	case SIOCSIFMEDIA:
2798	case SIOCGIFMEDIA:
2799	case SIOCGIFXMEDIA:
2800		ifr = (struct ifreq *)data;
2801		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2802		break;
2803	case SIOCSIFCAP:
2804		ifr = (struct ifreq *)data;
2805		PRIV_LOCK(priv);
2806		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2807
2808		if (mask & IFCAP_TXCSUM) {
2809			ifp->if_capenable ^= IFCAP_TXCSUM;
2810			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2811
2812			if (IFCAP_TSO4 & ifp->if_capenable &&
2813			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2814				ifp->if_capenable &= ~IFCAP_TSO4;
2815				ifp->if_hwassist &= ~CSUM_IP_TSO;
2816				if_printf(ifp,
2817				    "tso4 disabled due to -txcsum.\n");
2818			}
2819		}
2820		if (mask & IFCAP_TXCSUM_IPV6) {
2821			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2822			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2823
2824			if (IFCAP_TSO6 & ifp->if_capenable &&
2825			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2826				ifp->if_capenable &= ~IFCAP_TSO6;
2827				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2828				if_printf(ifp,
2829				    "tso6 disabled due to -txcsum6.\n");
2830			}
2831		}
2832		if (mask & IFCAP_RXCSUM)
2833			ifp->if_capenable ^= IFCAP_RXCSUM;
2834		if (mask & IFCAP_RXCSUM_IPV6)
2835			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2836		if (mask & IFCAP_TSO4) {
2837			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2838			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2839				if_printf(ifp, "enable txcsum first.\n");
2840				error = EAGAIN;
2841				goto out;
2842			}
2843			ifp->if_capenable ^= IFCAP_TSO4;
2844			ifp->if_hwassist ^= CSUM_IP_TSO;
2845		}
2846		if (mask & IFCAP_TSO6) {
2847			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2848			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2849				if_printf(ifp, "enable txcsum6 first.\n");
2850				error = EAGAIN;
2851				goto out;
2852			}
2853			ifp->if_capenable ^= IFCAP_TSO6;
2854			ifp->if_hwassist ^= CSUM_IP6_TSO;
2855		}
2856		if (mask & IFCAP_VLAN_HWFILTER) {
2857			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2858				mlx5e_disable_vlan_filter(priv);
2859			else
2860				mlx5e_enable_vlan_filter(priv);
2861
2862			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2863		}
2864		if (mask & IFCAP_VLAN_HWTAGGING)
2865			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2866		if (mask & IFCAP_WOL_MAGIC)
2867			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2868
2869		VLAN_CAPABILITIES(ifp);
2870		/* turn off LRO means also turn of HW LRO - if it's on */
2871		if (mask & IFCAP_LRO) {
2872			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2873			bool need_restart = false;
2874
2875			ifp->if_capenable ^= IFCAP_LRO;
2876
2877			/* figure out if updating HW LRO is needed */
2878			if (!(ifp->if_capenable & IFCAP_LRO)) {
2879				if (priv->params.hw_lro_en) {
2880					priv->params.hw_lro_en = false;
2881					need_restart = true;
2882				}
2883			} else {
2884				if (priv->params.hw_lro_en == false &&
2885				    priv->params_ethtool.hw_lro != 0) {
2886					priv->params.hw_lro_en = true;
2887					need_restart = true;
2888				}
2889			}
2890			if (was_opened && need_restart) {
2891				mlx5e_close_locked(ifp);
2892				mlx5e_open_locked(ifp);
2893			}
2894		}
2895out:
2896		PRIV_UNLOCK(priv);
2897		break;
2898
2899	case SIOCGI2C:
2900		ifr = (struct ifreq *)data;
2901
2902		/*
2903		 * Copy from the user-space address ifr_data to the
2904		 * kernel-space address i2c
2905		 */
2906		error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2907		if (error)
2908			break;
2909
2910		if (i2c.len > sizeof(i2c.data)) {
2911			error = EINVAL;
2912			break;
2913		}
2914
2915		PRIV_LOCK(priv);
2916		/* Get module_num which is required for the query_eeprom */
2917		error = mlx5_query_module_num(priv->mdev, &module_num);
2918		if (error) {
2919			if_printf(ifp, "Query module num failed, eeprom "
2920			    "reading is not supported\n");
2921			error = EINVAL;
2922			goto err_i2c;
2923		}
2924		/* Check if module is present before doing an access */
2925		module_status = mlx5_query_module_status(priv->mdev, module_num);
2926		if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
2927		    module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
2928			error = EINVAL;
2929			goto err_i2c;
2930		}
2931		/*
2932		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2933		 * The internal conversion is as follows:
2934		 */
2935		if (i2c.dev_addr == 0xA0)
2936			read_addr = MLX5E_I2C_ADDR_LOW;
2937		else if (i2c.dev_addr == 0xA2)
2938			read_addr = MLX5E_I2C_ADDR_HIGH;
2939		else {
2940			if_printf(ifp, "Query eeprom failed, "
2941			    "Invalid Address: %X\n", i2c.dev_addr);
2942			error = EINVAL;
2943			goto err_i2c;
2944		}
2945		error = mlx5_query_eeprom(priv->mdev,
2946		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2947		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2948		    (uint32_t *)i2c.data, &size_read);
2949		if (error) {
2950			if_printf(ifp, "Query eeprom failed, eeprom "
2951			    "reading is not supported\n");
2952			error = EINVAL;
2953			goto err_i2c;
2954		}
2955
2956		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2957			error = mlx5_query_eeprom(priv->mdev,
2958			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2959			    (uint32_t)(i2c.offset + size_read),
2960			    (uint32_t)(i2c.len - size_read), module_num,
2961			    (uint32_t *)(i2c.data + size_read), &size_read);
2962		}
2963		if (error) {
2964			if_printf(ifp, "Query eeprom failed, eeprom "
2965			    "reading is not supported\n");
2966			error = EINVAL;
2967			goto err_i2c;
2968		}
2969
2970		error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2971err_i2c:
2972		PRIV_UNLOCK(priv);
2973		break;
2974
2975	default:
2976		error = ether_ioctl(ifp, command, data);
2977		break;
2978	}
2979	return (error);
2980}
2981
2982static int
2983mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2984{
2985	/*
2986	 * TODO: uncoment once FW really sets all these bits if
2987	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2988	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2989	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2990	 * -ENOTSUPP;
2991	 */
2992
2993	/* TODO: add more must-to-have features */
2994
2995	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
2996		return (-ENODEV);
2997
2998	return (0);
2999}
3000
3001static u16
3002mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
3003{
3004	uint32_t bf_buf_size = (1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U;
3005
3006	bf_buf_size -= sizeof(struct mlx5e_tx_wqe) - 2;
3007
3008	/* verify against driver hardware limit */
3009	if (bf_buf_size > MLX5E_MAX_TX_INLINE)
3010		bf_buf_size = MLX5E_MAX_TX_INLINE;
3011
3012	return (bf_buf_size);
3013}
3014
3015static int
3016mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
3017    struct mlx5e_priv *priv,
3018    int num_comp_vectors)
3019{
3020	int err;
3021
3022	/*
3023	 * TODO: Consider link speed for setting "log_sq_size",
3024	 * "log_rq_size" and "cq_moderation_xxx":
3025	 */
3026	priv->params.log_sq_size =
3027	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
3028	priv->params.log_rq_size =
3029	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
3030	priv->params.rx_cq_moderation_usec =
3031	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
3032	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
3033	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
3034	priv->params.rx_cq_moderation_mode =
3035	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
3036	priv->params.rx_cq_moderation_pkts =
3037	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
3038	priv->params.tx_cq_moderation_usec =
3039	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
3040	priv->params.tx_cq_moderation_pkts =
3041	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
3042	priv->params.min_rx_wqes =
3043	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
3044	priv->params.rx_hash_log_tbl_sz =
3045	    (order_base_2(num_comp_vectors) >
3046	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
3047	    order_base_2(num_comp_vectors) :
3048	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
3049	priv->params.num_tc = 1;
3050	priv->params.default_vlan_prio = 0;
3051	priv->counter_set_id = -1;
3052	priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
3053
3054	err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
3055	if (err)
3056		return (err);
3057
3058	/*
3059	 * hw lro is currently defaulted to off. when it won't anymore we
3060	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
3061	 */
3062	priv->params.hw_lro_en = false;
3063	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
3064
3065	priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
3066
3067	priv->mdev = mdev;
3068	priv->params.num_channels = num_comp_vectors;
3069	priv->params.channels_rsss = 1;
3070	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
3071	priv->queue_mapping_channel_mask =
3072	    roundup_pow_of_two(num_comp_vectors) - 1;
3073	priv->num_tc = priv->params.num_tc;
3074	priv->default_vlan_prio = priv->params.default_vlan_prio;
3075
3076	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
3077	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
3078	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
3079
3080	return (0);
3081}
3082
3083static int
3084mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
3085		  struct mlx5_core_mr *mkey)
3086{
3087	struct ifnet *ifp = priv->ifp;
3088	struct mlx5_core_dev *mdev = priv->mdev;
3089	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
3090	void *mkc;
3091	u32 *in;
3092	int err;
3093
3094	in = mlx5_vzalloc(inlen);
3095	if (in == NULL) {
3096		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
3097		return (-ENOMEM);
3098	}
3099
3100	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
3101	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
3102	MLX5_SET(mkc, mkc, lw, 1);
3103	MLX5_SET(mkc, mkc, lr, 1);
3104
3105	MLX5_SET(mkc, mkc, pd, pdn);
3106	MLX5_SET(mkc, mkc, length64, 1);
3107	MLX5_SET(mkc, mkc, qpn, 0xffffff);
3108
3109	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
3110	if (err)
3111		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
3112		    __func__, err);
3113
3114	kvfree(in);
3115	return (err);
3116}
3117
3118static const char *mlx5e_vport_stats_desc[] = {
3119	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3120};
3121
3122static const char *mlx5e_pport_stats_desc[] = {
3123	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3124};
3125
3126static void
3127mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3128{
3129	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3130	sx_init(&priv->state_lock, "mlx5state");
3131	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3132	MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3133}
3134
3135static void
3136mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3137{
3138	mtx_destroy(&priv->async_events_mtx);
3139	sx_destroy(&priv->state_lock);
3140}
3141
3142static int
3143sysctl_firmware(SYSCTL_HANDLER_ARGS)
3144{
3145	/*
3146	 * %d.%d%.d the string format.
3147	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3148	 * We need at most 5 chars to store that.
3149	 * It also has: two "." and NULL at the end, which means we need 18
3150	 * (5*3 + 3) chars at most.
3151	 */
3152	char fw[18];
3153	struct mlx5e_priv *priv = arg1;
3154	int error;
3155
3156	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3157	    fw_rev_sub(priv->mdev));
3158	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3159	return (error);
3160}
3161
3162static void
3163mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3164{
3165	int i;
3166
3167	for (i = 0; i < ch->num_tc; i++)
3168		mlx5e_drain_sq(&ch->sq[i]);
3169}
3170
3171static void
3172mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3173{
3174
3175	sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3176	sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3177	mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3178	sq->doorbell.d64 = 0;
3179}
3180
3181void
3182mlx5e_resume_sq(struct mlx5e_sq *sq)
3183{
3184	int err;
3185
3186	/* check if already enabled */
3187	if (READ_ONCE(sq->running) != 0)
3188		return;
3189
3190	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3191	    MLX5_SQC_STATE_RST);
3192	if (err != 0) {
3193		if_printf(sq->ifp,
3194		    "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3195	}
3196
3197	sq->cc = 0;
3198	sq->pc = 0;
3199
3200	/* reset doorbell prior to moving from RST to RDY */
3201	mlx5e_reset_sq_doorbell_record(sq);
3202
3203	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3204	    MLX5_SQC_STATE_RDY);
3205	if (err != 0) {
3206		if_printf(sq->ifp,
3207		    "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3208	}
3209
3210	sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3211	WRITE_ONCE(sq->running, 1);
3212}
3213
3214static void
3215mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3216{
3217        int i;
3218
3219	for (i = 0; i < ch->num_tc; i++)
3220		mlx5e_resume_sq(&ch->sq[i]);
3221}
3222
3223static void
3224mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3225{
3226	struct mlx5e_rq *rq = &ch->rq;
3227	int err;
3228
3229	mtx_lock(&rq->mtx);
3230	rq->enabled = 0;
3231	callout_stop(&rq->watchdog);
3232	mtx_unlock(&rq->mtx);
3233
3234	callout_drain(&rq->watchdog);
3235
3236	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3237	if (err != 0) {
3238		if_printf(rq->ifp,
3239		    "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3240	}
3241
3242	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3243		msleep(1);
3244		rq->cq.mcq.comp(&rq->cq.mcq);
3245	}
3246
3247	/*
3248	 * Transitioning into RST state will allow the FW to track less ERR state queues,
3249	 * thus reducing the recv queue flushing time
3250	 */
3251	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3252	if (err != 0) {
3253		if_printf(rq->ifp,
3254		    "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3255	}
3256}
3257
3258static void
3259mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3260{
3261	struct mlx5e_rq *rq = &ch->rq;
3262	int err;
3263
3264	rq->wq.wqe_ctr = 0;
3265	mlx5_wq_ll_update_db_record(&rq->wq);
3266	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3267	if (err != 0) {
3268		if_printf(rq->ifp,
3269		    "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3270        }
3271
3272	rq->enabled = 1;
3273
3274	rq->cq.mcq.comp(&rq->cq.mcq);
3275}
3276
3277void
3278mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3279{
3280	int i;
3281
3282	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
3283		return;
3284
3285	for (i = 0; i < priv->params.num_channels; i++) {
3286		if (value)
3287			mlx5e_disable_tx_dma(&priv->channel[i]);
3288		else
3289			mlx5e_enable_tx_dma(&priv->channel[i]);
3290	}
3291}
3292
3293void
3294mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3295{
3296	int i;
3297
3298	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
3299		return;
3300
3301	for (i = 0; i < priv->params.num_channels; i++) {
3302		if (value)
3303			mlx5e_disable_rx_dma(&priv->channel[i]);
3304		else
3305			mlx5e_enable_rx_dma(&priv->channel[i]);
3306	}
3307}
3308
3309static void
3310mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3311{
3312	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3313	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3314	    sysctl_firmware, "A", "HCA firmware version");
3315
3316	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3317	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3318	    "Board ID");
3319}
3320
3321static int
3322mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3323{
3324	struct mlx5e_priv *priv = arg1;
3325	uint32_t tx_pfc;
3326	uint32_t value;
3327	int error;
3328
3329	PRIV_LOCK(priv);
3330
3331	tx_pfc = priv->params.tx_priority_flow_control;
3332
3333	/* get current value */
3334	value = (tx_pfc >> arg2) & 1;
3335
3336	error = sysctl_handle_32(oidp, &value, 0, req);
3337
3338	/* range check value */
3339	if (value != 0)
3340		priv->params.tx_priority_flow_control |= (1 << arg2);
3341	else
3342		priv->params.tx_priority_flow_control &= ~(1 << arg2);
3343
3344	/* check if update is required */
3345	if (error == 0 && priv->gone == 0 &&
3346	    tx_pfc != priv->params.tx_priority_flow_control) {
3347		error = -mlx5e_set_port_pfc(priv);
3348		/* restore previous value */
3349		if (error != 0)
3350			priv->params.tx_priority_flow_control= tx_pfc;
3351	}
3352	PRIV_UNLOCK(priv);
3353
3354	return (error);
3355}
3356
3357static int
3358mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3359{
3360	struct mlx5e_priv *priv = arg1;
3361	uint32_t rx_pfc;
3362	uint32_t value;
3363	int error;
3364
3365	PRIV_LOCK(priv);
3366
3367	rx_pfc = priv->params.rx_priority_flow_control;
3368
3369	/* get current value */
3370	value = (rx_pfc >> arg2) & 1;
3371
3372	error = sysctl_handle_32(oidp, &value, 0, req);
3373
3374	/* range check value */
3375	if (value != 0)
3376		priv->params.rx_priority_flow_control |= (1 << arg2);
3377	else
3378		priv->params.rx_priority_flow_control &= ~(1 << arg2);
3379
3380	/* check if update is required */
3381	if (error == 0 && priv->gone == 0 &&
3382	    rx_pfc != priv->params.rx_priority_flow_control) {
3383		error = -mlx5e_set_port_pfc(priv);
3384		/* restore previous value */
3385		if (error != 0)
3386			priv->params.rx_priority_flow_control= rx_pfc;
3387	}
3388	PRIV_UNLOCK(priv);
3389
3390	return (error);
3391}
3392
3393static void
3394mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3395{
3396	unsigned int x;
3397	char path[96];
3398	int error;
3399
3400	/* enable pauseframes by default */
3401	priv->params.tx_pauseframe_control = 1;
3402	priv->params.rx_pauseframe_control = 1;
3403
3404	/* disable ports flow control, PFC, by default */
3405	priv->params.tx_priority_flow_control = 0;
3406	priv->params.rx_priority_flow_control = 0;
3407
3408#if (__FreeBSD_version < 1100000)
3409	/* compute path for sysctl */
3410	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3411	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3412
3413	/* try to fetch tunable, if any */
3414	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3415
3416	/* compute path for sysctl */
3417	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3418	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3419
3420	/* try to fetch tunable, if any */
3421	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3422
3423	for (x = 0; x != 8; x++) {
3424
3425		/* compute path for sysctl */
3426		snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u",
3427		    device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3428
3429		/* try to fetch tunable, if any */
3430		if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3431			priv->params.tx_priority_flow_control |= 1 << x;
3432
3433		/* compute path for sysctl */
3434		snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u",
3435		    device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3436
3437		/* try to fetch tunable, if any */
3438		if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3439			priv->params.rx_priority_flow_control |= 1 << x;
3440	}
3441#endif
3442
3443	/* register pauseframe SYSCTLs */
3444	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3445	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3446	    &priv->params.tx_pauseframe_control, 0,
3447	    "Set to enable TX pause frames. Clear to disable.");
3448
3449	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3450	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3451	    &priv->params.rx_pauseframe_control, 0,
3452	    "Set to enable RX pause frames. Clear to disable.");
3453
3454	/* register priority_flow control, PFC, SYSCTLs */
3455	for (x = 0; x != 8; x++) {
3456		snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x);
3457
3458		SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3459		    OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3460		    CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU",
3461		    "Set to enable TX ports flow control frames for given priority. Clear to disable.");
3462
3463		snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x);
3464
3465		SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3466		    OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3467		    CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU",
3468		    "Set to enable RX ports flow control frames for given priority. Clear to disable.");
3469	}
3470
3471	PRIV_LOCK(priv);
3472
3473	/* range check */
3474	priv->params.tx_pauseframe_control =
3475	    priv->params.tx_pauseframe_control ? 1 : 0;
3476	priv->params.rx_pauseframe_control =
3477	    priv->params.rx_pauseframe_control ? 1 : 0;
3478
3479	/* update firmware */
3480	error = mlx5e_set_port_pause_and_pfc(priv);
3481	if (error == -EINVAL) {
3482		if_printf(priv->ifp,
3483		    "Global pauseframes must be disabled before enabling PFC.\n");
3484		priv->params.rx_priority_flow_control = 0;
3485		priv->params.tx_priority_flow_control = 0;
3486
3487		/* update firmware */
3488		(void) mlx5e_set_port_pause_and_pfc(priv);
3489	}
3490	PRIV_UNLOCK(priv);
3491}
3492
3493static void *
3494mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3495{
3496	struct ifnet *ifp;
3497	struct mlx5e_priv *priv;
3498	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3499	struct sysctl_oid_list *child;
3500	int ncv = mdev->priv.eq_table.num_comp_vectors;
3501	char unit[16];
3502	int err;
3503	int i;
3504	u32 eth_proto_cap;
3505
3506	if (mlx5e_check_required_hca_cap(mdev)) {
3507		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3508		return (NULL);
3509	}
3510	/*
3511	 * Try to allocate the priv and make room for worst-case
3512	 * number of channel structures:
3513	 */
3514	priv = malloc(sizeof(*priv) +
3515	    (sizeof(priv->channel[0]) * mdev->priv.eq_table.num_comp_vectors),
3516	    M_MLX5EN, M_WAITOK | M_ZERO);
3517	mlx5e_priv_mtx_init(priv);
3518
3519	ifp = priv->ifp = if_alloc(IFT_ETHER);
3520	if (ifp == NULL) {
3521		mlx5_core_err(mdev, "if_alloc() failed\n");
3522		goto err_free_priv;
3523	}
3524	ifp->if_softc = priv;
3525	if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
3526	ifp->if_mtu = ETHERMTU;
3527	ifp->if_init = mlx5e_open;
3528	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3529	ifp->if_ioctl = mlx5e_ioctl;
3530	ifp->if_transmit = mlx5e_xmit;
3531	ifp->if_qflush = if_qflush;
3532#if (__FreeBSD_version >= 1100000)
3533	ifp->if_get_counter = mlx5e_get_counter;
3534#endif
3535	ifp->if_snd.ifq_maxlen = ifqmaxlen;
3536	/*
3537         * Set driver features
3538         */
3539	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3540	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3541	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3542	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3543	ifp->if_capabilities |= IFCAP_LRO;
3544	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3545	ifp->if_capabilities |= IFCAP_HWSTATS;
3546
3547	/* set TSO limits so that we don't have to drop TX packets */
3548	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3549	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3550	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3551
3552	ifp->if_capenable = ifp->if_capabilities;
3553	ifp->if_hwassist = 0;
3554	if (ifp->if_capenable & IFCAP_TSO)
3555		ifp->if_hwassist |= CSUM_TSO;
3556	if (ifp->if_capenable & IFCAP_TXCSUM)
3557		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3558	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3559		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3560
3561	/* ifnet sysctl tree */
3562	sysctl_ctx_init(&priv->sysctl_ctx);
3563	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3564	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3565	if (priv->sysctl_ifnet == NULL) {
3566		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3567		goto err_free_sysctl;
3568	}
3569	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3570	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3571	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3572	if (priv->sysctl_ifnet == NULL) {
3573		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3574		goto err_free_sysctl;
3575	}
3576
3577	/* HW sysctl tree */
3578	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3579	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3580	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3581	if (priv->sysctl_hw == NULL) {
3582		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3583		goto err_free_sysctl;
3584	}
3585
3586	err = mlx5e_build_ifp_priv(mdev, priv, ncv);
3587	if (err) {
3588		mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
3589		goto err_free_sysctl;
3590	}
3591
3592	snprintf(unit, sizeof(unit), "mce%u_wq",
3593	    device_get_unit(mdev->pdev->dev.bsddev));
3594	priv->wq = alloc_workqueue(unit, 0, 1);
3595	if (priv->wq == NULL) {
3596		if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
3597		goto err_free_sysctl;
3598	}
3599
3600	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3601	if (err) {
3602		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3603		    __func__, err);
3604		goto err_free_wq;
3605	}
3606	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3607	if (err) {
3608		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3609		    __func__, err);
3610		goto err_unmap_free_uar;
3611	}
3612	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3613	if (err) {
3614		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3615		    __func__, err);
3616		goto err_dealloc_pd;
3617	}
3618	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3619	if (err) {
3620		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3621		    __func__, err);
3622		goto err_dealloc_transport_domain;
3623	}
3624	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3625
3626	/* check if we should generate a random MAC address */
3627	if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3628	    is_zero_ether_addr(dev_addr)) {
3629		random_ether_addr(dev_addr);
3630		if_printf(ifp, "Assigned random MAC address\n");
3631	}
3632
3633	/* set default MTU */
3634	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3635
3636	/* Set default media status */
3637	priv->media_status_last = IFM_AVALID;
3638	priv->media_active_last = IFM_ETHER | IFM_AUTO |
3639	    IFM_ETH_RXPAUSE | IFM_FDX;
3640
3641	/* setup default pauseframes configuration */
3642	mlx5e_setup_pauseframes(priv);
3643
3644	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3645	if (err) {
3646		eth_proto_cap = 0;
3647		if_printf(ifp, "%s: Query port media capability failed, %d\n",
3648		    __func__, err);
3649	}
3650
3651	/* Setup supported medias */
3652	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3653	    mlx5e_media_change, mlx5e_media_status);
3654
3655	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3656		if (mlx5e_mode_table[i].baudrate == 0)
3657			continue;
3658		if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3659			ifmedia_add(&priv->media,
3660			    mlx5e_mode_table[i].subtype |
3661			    IFM_ETHER, 0, NULL);
3662			ifmedia_add(&priv->media,
3663			    mlx5e_mode_table[i].subtype |
3664			    IFM_ETHER | IFM_FDX |
3665			    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3666		}
3667	}
3668
3669	/* Additional supported medias */
3670	ifmedia_add(&priv->media, IFM_10G_LR | IFM_ETHER, 0, NULL);
3671	ifmedia_add(&priv->media, IFM_10G_LR |
3672	    IFM_ETHER | IFM_FDX |
3673	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3674
3675	ifmedia_add(&priv->media, IFM_40G_ER4 | IFM_ETHER, 0, NULL);
3676	ifmedia_add(&priv->media, IFM_40G_ER4 |
3677	    IFM_ETHER | IFM_FDX |
3678	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3679
3680	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3681	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3682	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3683
3684	/* Set autoselect by default */
3685	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3686	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3687	ether_ifattach(ifp, dev_addr);
3688
3689	/* Register for VLAN events */
3690	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3691	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3692	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3693	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3694
3695	/* Link is down by default */
3696	if_link_state_change(ifp, LINK_STATE_DOWN);
3697
3698	mlx5e_enable_async_events(priv);
3699
3700	mlx5e_add_hw_stats(priv);
3701
3702	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3703	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3704	    priv->stats.vport.arg);
3705
3706	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3707	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3708	    priv->stats.pport.arg);
3709
3710	mlx5e_create_ethtool(priv);
3711
3712	mtx_lock(&priv->async_events_mtx);
3713	mlx5e_update_stats(priv);
3714	mtx_unlock(&priv->async_events_mtx);
3715
3716	return (priv);
3717
3718err_dealloc_transport_domain:
3719	mlx5_dealloc_transport_domain(mdev, priv->tdn);
3720
3721err_dealloc_pd:
3722	mlx5_core_dealloc_pd(mdev, priv->pdn);
3723
3724err_unmap_free_uar:
3725	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3726
3727err_free_wq:
3728	destroy_workqueue(priv->wq);
3729
3730err_free_sysctl:
3731	sysctl_ctx_free(&priv->sysctl_ctx);
3732	if (priv->sysctl_debug)
3733		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3734	if_free(ifp);
3735
3736err_free_priv:
3737	mlx5e_priv_mtx_destroy(priv);
3738	free(priv, M_MLX5EN);
3739	return (NULL);
3740}
3741
3742static void
3743mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3744{
3745	struct mlx5e_priv *priv = vpriv;
3746	struct ifnet *ifp = priv->ifp;
3747
3748	/* don't allow more IOCTLs */
3749	priv->gone = 1;
3750
3751	/* XXX wait a bit to allow IOCTL handlers to complete */
3752	pause("W", hz);
3753
3754	/* stop watchdog timer */
3755	callout_drain(&priv->watchdog);
3756
3757	if (priv->vlan_attach != NULL)
3758		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3759	if (priv->vlan_detach != NULL)
3760		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3761
3762	/* make sure device gets closed */
3763	PRIV_LOCK(priv);
3764	mlx5e_close_locked(ifp);
3765	PRIV_UNLOCK(priv);
3766
3767	/* unregister device */
3768	ifmedia_removeall(&priv->media);
3769	ether_ifdetach(ifp);
3770	if_free(ifp);
3771
3772	/* destroy all remaining sysctl nodes */
3773	sysctl_ctx_free(&priv->stats.vport.ctx);
3774	sysctl_ctx_free(&priv->stats.pport.ctx);
3775	sysctl_ctx_free(&priv->sysctl_ctx);
3776	if (priv->sysctl_debug)
3777		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3778
3779	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3780	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3781	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3782	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3783	mlx5e_disable_async_events(priv);
3784	destroy_workqueue(priv->wq);
3785	mlx5e_priv_mtx_destroy(priv);
3786	free(priv, M_MLX5EN);
3787}
3788
3789static void *
3790mlx5e_get_ifp(void *vpriv)
3791{
3792	struct mlx5e_priv *priv = vpriv;
3793
3794	return (priv->ifp);
3795}
3796
3797static struct mlx5_interface mlx5e_interface = {
3798	.add = mlx5e_create_ifp,
3799	.remove = mlx5e_destroy_ifp,
3800	.event = mlx5e_async_event,
3801	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3802	.get_dev = mlx5e_get_ifp,
3803};
3804
3805void
3806mlx5e_init(void)
3807{
3808	mlx5_register_interface(&mlx5e_interface);
3809}
3810
3811void
3812mlx5e_cleanup(void)
3813{
3814	mlx5_unregister_interface(&mlx5e_interface);
3815}
3816
3817static void
3818mlx5e_show_version(void __unused *arg)
3819{
3820
3821	printf("%s", mlx5e_version);
3822}
3823SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL);
3824
3825module_init_order(mlx5e_init, SI_ORDER_THIRD);
3826module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3827
3828#if (__FreeBSD_version >= 1100000)
3829MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3830#endif
3831MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3832MODULE_VERSION(mlx5en, 1);
3833