mlx5_en_main.c revision 347850
1/*-
2 * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c 347850 2019-05-16 18:01:23Z hselasky $
26 */
27
28#include "en.h"
29
30#include <sys/sockio.h>
31#include <machine/atomic.h>
32
33#ifndef ETH_DRIVER_VERSION
34#define	ETH_DRIVER_VERSION	"3.5.0"
35#endif
36#define DRIVER_RELDATE	"November 2018"
37
38static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver "
39	ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
40
41static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
42
43struct mlx5e_channel_param {
44	struct mlx5e_rq_param rq;
45	struct mlx5e_sq_param sq;
46	struct mlx5e_cq_param rx_cq;
47	struct mlx5e_cq_param tx_cq;
48};
49
50static const struct {
51	u32	subtype;
52	u64	baudrate;
53}	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
54
55	[MLX5E_1000BASE_CX_SGMII] = {
56		.subtype = IFM_1000_CX_SGMII,
57		.baudrate = IF_Mbps(1000ULL),
58	},
59	[MLX5E_1000BASE_KX] = {
60		.subtype = IFM_1000_KX,
61		.baudrate = IF_Mbps(1000ULL),
62	},
63	[MLX5E_10GBASE_CX4] = {
64		.subtype = IFM_10G_CX4,
65		.baudrate = IF_Gbps(10ULL),
66	},
67	[MLX5E_10GBASE_KX4] = {
68		.subtype = IFM_10G_KX4,
69		.baudrate = IF_Gbps(10ULL),
70	},
71	[MLX5E_10GBASE_KR] = {
72		.subtype = IFM_10G_KR,
73		.baudrate = IF_Gbps(10ULL),
74	},
75	[MLX5E_20GBASE_KR2] = {
76		.subtype = IFM_20G_KR2,
77		.baudrate = IF_Gbps(20ULL),
78	},
79	[MLX5E_40GBASE_CR4] = {
80		.subtype = IFM_40G_CR4,
81		.baudrate = IF_Gbps(40ULL),
82	},
83	[MLX5E_40GBASE_KR4] = {
84		.subtype = IFM_40G_KR4,
85		.baudrate = IF_Gbps(40ULL),
86	},
87	[MLX5E_56GBASE_R4] = {
88		.subtype = IFM_56G_R4,
89		.baudrate = IF_Gbps(56ULL),
90	},
91	[MLX5E_10GBASE_CR] = {
92		.subtype = IFM_10G_CR1,
93		.baudrate = IF_Gbps(10ULL),
94	},
95	[MLX5E_10GBASE_SR] = {
96		.subtype = IFM_10G_SR,
97		.baudrate = IF_Gbps(10ULL),
98	},
99	[MLX5E_10GBASE_ER] = {
100		.subtype = IFM_10G_ER,
101		.baudrate = IF_Gbps(10ULL),
102	},
103	[MLX5E_40GBASE_SR4] = {
104		.subtype = IFM_40G_SR4,
105		.baudrate = IF_Gbps(40ULL),
106	},
107	[MLX5E_40GBASE_LR4] = {
108		.subtype = IFM_40G_LR4,
109		.baudrate = IF_Gbps(40ULL),
110	},
111	[MLX5E_100GBASE_CR4] = {
112		.subtype = IFM_100G_CR4,
113		.baudrate = IF_Gbps(100ULL),
114	},
115	[MLX5E_100GBASE_SR4] = {
116		.subtype = IFM_100G_SR4,
117		.baudrate = IF_Gbps(100ULL),
118	},
119	[MLX5E_100GBASE_KR4] = {
120		.subtype = IFM_100G_KR4,
121		.baudrate = IF_Gbps(100ULL),
122	},
123	[MLX5E_100GBASE_LR4] = {
124		.subtype = IFM_100G_LR4,
125		.baudrate = IF_Gbps(100ULL),
126	},
127	[MLX5E_100BASE_TX] = {
128		.subtype = IFM_100_TX,
129		.baudrate = IF_Mbps(100ULL),
130	},
131	[MLX5E_1000BASE_T] = {
132		.subtype = IFM_1000_T,
133		.baudrate = IF_Mbps(1000ULL),
134	},
135	[MLX5E_10GBASE_T] = {
136		.subtype = IFM_10G_T,
137		.baudrate = IF_Gbps(10ULL),
138	},
139	[MLX5E_25GBASE_CR] = {
140		.subtype = IFM_25G_CR,
141		.baudrate = IF_Gbps(25ULL),
142	},
143	[MLX5E_25GBASE_KR] = {
144		.subtype = IFM_25G_KR,
145		.baudrate = IF_Gbps(25ULL),
146	},
147	[MLX5E_25GBASE_SR] = {
148		.subtype = IFM_25G_SR,
149		.baudrate = IF_Gbps(25ULL),
150	},
151	[MLX5E_50GBASE_CR2] = {
152		.subtype = IFM_50G_CR2,
153		.baudrate = IF_Gbps(50ULL),
154	},
155	[MLX5E_50GBASE_KR2] = {
156		.subtype = IFM_50G_KR2,
157		.baudrate = IF_Gbps(50ULL),
158	},
159};
160
161MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
162
163static void
164mlx5e_update_carrier(struct mlx5e_priv *priv)
165{
166	struct mlx5_core_dev *mdev = priv->mdev;
167	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
168	u32 eth_proto_oper;
169	int error;
170	u8 port_state;
171	u8 is_er_type;
172	u8 i;
173
174	port_state = mlx5_query_vport_state(mdev,
175	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
176
177	if (port_state == VPORT_STATE_UP) {
178		priv->media_status_last |= IFM_ACTIVE;
179	} else {
180		priv->media_status_last &= ~IFM_ACTIVE;
181		priv->media_active_last = IFM_ETHER;
182		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
183		return;
184	}
185
186	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
187	if (error) {
188		priv->media_active_last = IFM_ETHER;
189		priv->ifp->if_baudrate = 1;
190		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
191		    __func__, error);
192		return;
193	}
194	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
195
196	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
197		if (mlx5e_mode_table[i].baudrate == 0)
198			continue;
199		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
200			u32 subtype = mlx5e_mode_table[i].subtype;
201
202			priv->ifp->if_baudrate =
203			    mlx5e_mode_table[i].baudrate;
204
205			switch (subtype) {
206			case IFM_10G_ER:
207				error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
208				if (error != 0) {
209					if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
210					    __func__, error);
211				}
212				if (error != 0 || is_er_type == 0)
213					subtype = IFM_10G_LR;
214				break;
215			case IFM_40G_LR4:
216				error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
217				if (error != 0) {
218					if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
219					    __func__, error);
220				}
221				if (error == 0 && is_er_type != 0)
222					subtype = IFM_40G_ER4;
223				break;
224			}
225			priv->media_active_last = subtype | IFM_ETHER | IFM_FDX;
226			break;
227		}
228	}
229	if_link_state_change(priv->ifp, LINK_STATE_UP);
230}
231
232static void
233mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
234{
235	struct mlx5e_priv *priv = dev->if_softc;
236
237	ifmr->ifm_status = priv->media_status_last;
238	ifmr->ifm_active = priv->media_active_last |
239	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
240	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
241
242}
243
244static u32
245mlx5e_find_link_mode(u32 subtype)
246{
247	u32 i;
248	u32 link_mode = 0;
249
250	switch (subtype) {
251	case IFM_10G_LR:
252		subtype = IFM_10G_ER;
253		break;
254	case IFM_40G_ER4:
255		subtype = IFM_40G_LR4;
256		break;
257	}
258
259	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
260		if (mlx5e_mode_table[i].baudrate == 0)
261			continue;
262		if (mlx5e_mode_table[i].subtype == subtype)
263			link_mode |= MLX5E_PROT_MASK(i);
264	}
265
266	return (link_mode);
267}
268
269static int
270mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
271{
272	return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
273	    priv->params.rx_pauseframe_control,
274	    priv->params.tx_pauseframe_control,
275	    priv->params.rx_priority_flow_control,
276	    priv->params.tx_priority_flow_control));
277}
278
279static int
280mlx5e_set_port_pfc(struct mlx5e_priv *priv)
281{
282	int error;
283
284	if (priv->gone != 0) {
285		error = -ENXIO;
286	} else if (priv->params.rx_pauseframe_control ||
287	    priv->params.tx_pauseframe_control) {
288		if_printf(priv->ifp,
289		    "Global pauseframes must be disabled before enabling PFC.\n");
290		error = -EINVAL;
291	} else {
292		error = mlx5e_set_port_pause_and_pfc(priv);
293	}
294	return (error);
295}
296
297static int
298mlx5e_media_change(struct ifnet *dev)
299{
300	struct mlx5e_priv *priv = dev->if_softc;
301	struct mlx5_core_dev *mdev = priv->mdev;
302	u32 eth_proto_cap;
303	u32 link_mode;
304	int was_opened;
305	int locked;
306	int error;
307
308	locked = PRIV_LOCKED(priv);
309	if (!locked)
310		PRIV_LOCK(priv);
311
312	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
313		error = EINVAL;
314		goto done;
315	}
316	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
317
318	/* query supported capabilities */
319	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
320	if (error != 0) {
321		if_printf(dev, "Query port media capability failed\n");
322		goto done;
323	}
324	/* check for autoselect */
325	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
326		link_mode = eth_proto_cap;
327		if (link_mode == 0) {
328			if_printf(dev, "Port media capability is zero\n");
329			error = EINVAL;
330			goto done;
331		}
332	} else {
333		link_mode = link_mode & eth_proto_cap;
334		if (link_mode == 0) {
335			if_printf(dev, "Not supported link mode requested\n");
336			error = EINVAL;
337			goto done;
338		}
339	}
340	if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
341		/* check if PFC is enabled */
342		if (priv->params.rx_priority_flow_control ||
343		    priv->params.tx_priority_flow_control) {
344			if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
345			error = EINVAL;
346			goto done;
347		}
348	}
349	/* update pauseframe control bits */
350	priv->params.rx_pauseframe_control =
351	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
352	priv->params.tx_pauseframe_control =
353	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
354
355	/* check if device is opened */
356	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
357
358	/* reconfigure the hardware */
359	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
360	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
361	error = -mlx5e_set_port_pause_and_pfc(priv);
362	if (was_opened)
363		mlx5_set_port_status(mdev, MLX5_PORT_UP);
364
365done:
366	if (!locked)
367		PRIV_UNLOCK(priv);
368	return (error);
369}
370
371static void
372mlx5e_update_carrier_work(struct work_struct *work)
373{
374	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
375	    update_carrier_work);
376
377	PRIV_LOCK(priv);
378	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
379		mlx5e_update_carrier(priv);
380	PRIV_UNLOCK(priv);
381}
382
383/*
384 * This function reads the physical port counters from the firmware
385 * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
386 * macros. The output is converted from big-endian 64-bit values into
387 * host endian ones and stored in the "priv->stats.pport" structure.
388 */
389static void
390mlx5e_update_pport_counters(struct mlx5e_priv *priv)
391{
392	struct mlx5_core_dev *mdev = priv->mdev;
393	struct mlx5e_pport_stats *s = &priv->stats.pport;
394	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
395	u32 *in;
396	u32 *out;
397	const u64 *ptr;
398	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
399	unsigned x;
400	unsigned y;
401	unsigned z;
402
403	/* allocate firmware request structures */
404	in = mlx5_vzalloc(sz);
405	out = mlx5_vzalloc(sz);
406	if (in == NULL || out == NULL)
407		goto free_out;
408
409	/*
410	 * Get pointer to the 64-bit counter set which is located at a
411	 * fixed offset in the output firmware request structure:
412	 */
413	ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
414
415	MLX5_SET(ppcnt_reg, in, local_port, 1);
416
417	/* read IEEE802_3 counter group using predefined counter layout */
418	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
419	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
420	for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
421	     x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
422		s->arg[y] = be64toh(ptr[x]);
423
424	/* read RFC2819 counter group using predefined counter layout */
425	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
426	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
427	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
428		s->arg[y] = be64toh(ptr[x]);
429	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
430	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
431		s_debug->arg[y] = be64toh(ptr[x]);
432
433	/* read RFC2863 counter group using predefined counter layout */
434	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
435	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
436	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
437		s_debug->arg[y] = be64toh(ptr[x]);
438
439	/* read physical layer stats counter group using predefined counter layout */
440	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
441	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
442	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
443		s_debug->arg[y] = be64toh(ptr[x]);
444
445	/* read Extended Ethernet counter group using predefined counter layout */
446	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
447	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
448	for (x = 0; x != MLX5E_PPORT_ETHERNET_EXTENDED_STATS_DEBUG_NUM; x++, y++)
449		s_debug->arg[y] = be64toh(ptr[x]);
450
451	/* read per-priority counters */
452	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
453
454	/* iterate all the priorities */
455	for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
456		MLX5_SET(ppcnt_reg, in, prio_tc, z);
457		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
458
459		/* read per priority stats counter group using predefined counter layout */
460		for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
461		    MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
462			s->arg[y] = be64toh(ptr[x]);
463	}
464
465free_out:
466	/* free firmware request structures */
467	kvfree(in);
468	kvfree(out);
469}
470
471static void
472mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv)
473{
474	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
475	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
476
477	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
478		return;
479
480	MLX5_SET(query_vnic_env_in, in, opcode,
481	    MLX5_CMD_OP_QUERY_VNIC_ENV);
482	MLX5_SET(query_vnic_env_in, in, op_mod, 0);
483	MLX5_SET(query_vnic_env_in, in, other_vport, 0);
484
485	if (mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)) != 0)
486		return;
487
488	priv->stats.vport.rx_steer_missed_packets =
489	    MLX5_GET64(query_vnic_env_out, out,
490	    vport_env.nic_receive_steering_discard);
491}
492
493/*
494 * This function is called regularly to collect all statistics
495 * counters from the firmware. The values can be viewed through the
496 * sysctl interface. Execution is serialized using the priv's global
497 * configuration lock.
498 */
499static void
500mlx5e_update_stats_locked(struct mlx5e_priv *priv)
501{
502	struct mlx5_core_dev *mdev = priv->mdev;
503	struct mlx5e_vport_stats *s = &priv->stats.vport;
504	struct mlx5e_sq_stats *sq_stats;
505	struct buf_ring *sq_br;
506#if (__FreeBSD_version < 1100000)
507	struct ifnet *ifp = priv->ifp;
508#endif
509
510	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
511	u32 *out;
512	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
513	u64 tso_packets = 0;
514	u64 tso_bytes = 0;
515	u64 tx_queue_dropped = 0;
516	u64 tx_defragged = 0;
517	u64 tx_offload_none = 0;
518	u64 lro_packets = 0;
519	u64 lro_bytes = 0;
520	u64 sw_lro_queued = 0;
521	u64 sw_lro_flushed = 0;
522	u64 rx_csum_none = 0;
523	u64 rx_wqe_err = 0;
524	u64 rx_packets = 0;
525	u64 rx_bytes = 0;
526	u32 rx_out_of_buffer = 0;
527	int i;
528	int j;
529
530	out = mlx5_vzalloc(outlen);
531	if (out == NULL)
532		goto free_out;
533
534	/* Collect firts the SW counters and then HW for consistency */
535	for (i = 0; i < priv->params.num_channels; i++) {
536		struct mlx5e_channel *pch = priv->channel + i;
537		struct mlx5e_rq *rq = &pch->rq;
538		struct mlx5e_rq_stats *rq_stats = &pch->rq.stats;
539
540		/* collect stats from LRO */
541		rq_stats->sw_lro_queued = rq->lro.lro_queued;
542		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
543		sw_lro_queued += rq_stats->sw_lro_queued;
544		sw_lro_flushed += rq_stats->sw_lro_flushed;
545		lro_packets += rq_stats->lro_packets;
546		lro_bytes += rq_stats->lro_bytes;
547		rx_csum_none += rq_stats->csum_none;
548		rx_wqe_err += rq_stats->wqe_err;
549		rx_packets += rq_stats->packets;
550		rx_bytes += rq_stats->bytes;
551
552		for (j = 0; j < priv->num_tc; j++) {
553			sq_stats = &pch->sq[j].stats;
554			sq_br = pch->sq[j].br;
555
556			tso_packets += sq_stats->tso_packets;
557			tso_bytes += sq_stats->tso_bytes;
558			tx_queue_dropped += sq_stats->dropped;
559			if (sq_br != NULL)
560				tx_queue_dropped += sq_br->br_drops;
561			tx_defragged += sq_stats->defragged;
562			tx_offload_none += sq_stats->csum_offload_none;
563		}
564	}
565
566	/* update counters */
567	s->tso_packets = tso_packets;
568	s->tso_bytes = tso_bytes;
569	s->tx_queue_dropped = tx_queue_dropped;
570	s->tx_defragged = tx_defragged;
571	s->lro_packets = lro_packets;
572	s->lro_bytes = lro_bytes;
573	s->sw_lro_queued = sw_lro_queued;
574	s->sw_lro_flushed = sw_lro_flushed;
575	s->rx_csum_none = rx_csum_none;
576	s->rx_wqe_err = rx_wqe_err;
577	s->rx_packets = rx_packets;
578	s->rx_bytes = rx_bytes;
579
580	mlx5e_grp_vnic_env_update_stats(priv);
581
582	/* HW counters */
583	memset(in, 0, sizeof(in));
584
585	MLX5_SET(query_vport_counter_in, in, opcode,
586	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
587	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
588	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
589
590	memset(out, 0, outlen);
591
592	/* get number of out-of-buffer drops first */
593	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0 &&
594	    mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
595	    &rx_out_of_buffer) == 0) {
596		/* accumulate difference into a 64-bit counter */
597		s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer -
598		    s->rx_out_of_buffer_prev);
599		s->rx_out_of_buffer_prev = rx_out_of_buffer;
600	}
601
602	/* get port statistics */
603	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen) == 0) {
604#define	MLX5_GET_CTR(out, x) \
605	MLX5_GET64(query_vport_counter_out, out, x)
606
607		s->rx_error_packets =
608		    MLX5_GET_CTR(out, received_errors.packets);
609		s->rx_error_bytes =
610		    MLX5_GET_CTR(out, received_errors.octets);
611		s->tx_error_packets =
612		    MLX5_GET_CTR(out, transmit_errors.packets);
613		s->tx_error_bytes =
614		    MLX5_GET_CTR(out, transmit_errors.octets);
615
616		s->rx_unicast_packets =
617		    MLX5_GET_CTR(out, received_eth_unicast.packets);
618		s->rx_unicast_bytes =
619		    MLX5_GET_CTR(out, received_eth_unicast.octets);
620		s->tx_unicast_packets =
621		    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
622		s->tx_unicast_bytes =
623		    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
624
625		s->rx_multicast_packets =
626		    MLX5_GET_CTR(out, received_eth_multicast.packets);
627		s->rx_multicast_bytes =
628		    MLX5_GET_CTR(out, received_eth_multicast.octets);
629		s->tx_multicast_packets =
630		    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
631		s->tx_multicast_bytes =
632		    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
633
634		s->rx_broadcast_packets =
635		    MLX5_GET_CTR(out, received_eth_broadcast.packets);
636		s->rx_broadcast_bytes =
637		    MLX5_GET_CTR(out, received_eth_broadcast.octets);
638		s->tx_broadcast_packets =
639		    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
640		s->tx_broadcast_bytes =
641		    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
642
643		s->tx_packets = s->tx_unicast_packets +
644		    s->tx_multicast_packets + s->tx_broadcast_packets;
645		s->tx_bytes = s->tx_unicast_bytes + s->tx_multicast_bytes +
646		    s->tx_broadcast_bytes;
647
648		/* Update calculated offload counters */
649		s->tx_csum_offload = s->tx_packets - tx_offload_none;
650		s->rx_csum_good = s->rx_packets - s->rx_csum_none;
651	}
652
653	/* Get physical port counters */
654	mlx5e_update_pport_counters(priv);
655
656	s->tx_jumbo_packets =
657	    priv->stats.port_stats_debug.tx_stat_p1519to2047octets +
658	    priv->stats.port_stats_debug.tx_stat_p2048to4095octets +
659	    priv->stats.port_stats_debug.tx_stat_p4096to8191octets +
660	    priv->stats.port_stats_debug.tx_stat_p8192to10239octets;
661
662#if (__FreeBSD_version < 1100000)
663	/* no get_counters interface in fbsd 10 */
664	ifp->if_ipackets = s->rx_packets;
665	ifp->if_ierrors = priv->stats.pport.in_range_len_errors +
666	    priv->stats.pport.out_of_range_len +
667	    priv->stats.pport.too_long_errors +
668	    priv->stats.pport.check_seq_err +
669	    priv->stats.pport.alignment_err;
670	ifp->if_iqdrops = s->rx_out_of_buffer;
671	ifp->if_opackets = s->tx_packets;
672	ifp->if_oerrors = priv->stats.port_stats_debug.out_discards;
673	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
674	ifp->if_ibytes = s->rx_bytes;
675	ifp->if_obytes = s->tx_bytes;
676	ifp->if_collisions =
677	    priv->stats.pport.collisions;
678#endif
679
680free_out:
681	kvfree(out);
682
683	/* Update diagnostics, if any */
684	if (priv->params_ethtool.diag_pci_enable ||
685	    priv->params_ethtool.diag_general_enable) {
686		int error = mlx5_core_get_diagnostics_full(mdev,
687		    priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
688		    priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
689		if (error != 0)
690			if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
691	}
692}
693
694static void
695mlx5e_update_stats_work(struct work_struct *work)
696{
697	struct mlx5e_priv *priv;
698
699	priv  = container_of(work, struct mlx5e_priv, update_stats_work);
700	PRIV_LOCK(priv);
701	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
702		mlx5e_update_stats_locked(priv);
703	PRIV_UNLOCK(priv);
704}
705
706static void
707mlx5e_update_stats(void *arg)
708{
709	struct mlx5e_priv *priv = arg;
710
711	queue_work(priv->wq, &priv->update_stats_work);
712
713	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
714}
715
716static void
717mlx5e_async_event_sub(struct mlx5e_priv *priv,
718    enum mlx5_dev_event event)
719{
720	switch (event) {
721	case MLX5_DEV_EVENT_PORT_UP:
722	case MLX5_DEV_EVENT_PORT_DOWN:
723		queue_work(priv->wq, &priv->update_carrier_work);
724		break;
725
726	default:
727		break;
728	}
729}
730
731static void
732mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
733    enum mlx5_dev_event event, unsigned long param)
734{
735	struct mlx5e_priv *priv = vpriv;
736
737	mtx_lock(&priv->async_events_mtx);
738	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
739		mlx5e_async_event_sub(priv, event);
740	mtx_unlock(&priv->async_events_mtx);
741}
742
743static void
744mlx5e_enable_async_events(struct mlx5e_priv *priv)
745{
746	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
747}
748
749static void
750mlx5e_disable_async_events(struct mlx5e_priv *priv)
751{
752	mtx_lock(&priv->async_events_mtx);
753	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
754	mtx_unlock(&priv->async_events_mtx);
755}
756
757static const char *mlx5e_rq_stats_desc[] = {
758	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
759};
760
761static int
762mlx5e_create_rq(struct mlx5e_channel *c,
763    struct mlx5e_rq_param *param,
764    struct mlx5e_rq *rq)
765{
766	struct mlx5e_priv *priv = c->priv;
767	struct mlx5_core_dev *mdev = priv->mdev;
768	char buffer[16];
769	void *rqc = param->rqc;
770	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
771	int wq_sz;
772	int err;
773	int i;
774	u32 nsegs, wqe_sz;
775
776	err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
777	if (err != 0)
778		goto done;
779
780	/* Create DMA descriptor TAG */
781	if ((err = -bus_dma_tag_create(
782	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
783	    1,				/* any alignment */
784	    0,				/* no boundary */
785	    BUS_SPACE_MAXADDR,		/* lowaddr */
786	    BUS_SPACE_MAXADDR,		/* highaddr */
787	    NULL, NULL,			/* filter, filterarg */
788	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsize */
789	    nsegs,			/* nsegments */
790	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsegsize */
791	    0,				/* flags */
792	    NULL, NULL,			/* lockfunc, lockfuncarg */
793	    &rq->dma_tag)))
794		goto done;
795
796	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
797	    &rq->wq_ctrl);
798	if (err)
799		goto err_free_dma_tag;
800
801	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
802
803	err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
804	if (err != 0)
805		goto err_rq_wq_destroy;
806
807	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
808
809	err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
810	if (err)
811		goto err_rq_wq_destroy;
812
813	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
814	for (i = 0; i != wq_sz; i++) {
815		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
816#if (MLX5E_MAX_RX_SEGS == 1)
817		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
818#else
819		int j;
820#endif
821
822		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
823		if (err != 0) {
824			while (i--)
825				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
826			goto err_rq_mbuf_free;
827		}
828
829		/* set value for constant fields */
830#if (MLX5E_MAX_RX_SEGS == 1)
831		wqe->data[0].lkey = c->mkey_be;
832		wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
833#else
834		for (j = 0; j < rq->nsegs; j++)
835			wqe->data[j].lkey = c->mkey_be;
836#endif
837	}
838
839	INIT_WORK(&rq->dim.work, mlx5e_dim_work);
840	if (priv->params.rx_cq_moderation_mode < 2) {
841		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
842	} else {
843		void *cqc = container_of(param,
844		    struct mlx5e_channel_param, rq)->rx_cq.cqc;
845
846		switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
847		case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
848			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
849			break;
850		case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
851			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
852			break;
853		default:
854			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
855			break;
856		}
857	}
858
859	rq->ifp = c->ifp;
860	rq->channel = c;
861	rq->ix = c->ix;
862
863	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
864	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
865	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
866	    rq->stats.arg);
867	return (0);
868
869err_rq_mbuf_free:
870	free(rq->mbuf, M_MLX5EN);
871	tcp_lro_free(&rq->lro);
872err_rq_wq_destroy:
873	mlx5_wq_destroy(&rq->wq_ctrl);
874err_free_dma_tag:
875	bus_dma_tag_destroy(rq->dma_tag);
876done:
877	return (err);
878}
879
880static void
881mlx5e_destroy_rq(struct mlx5e_rq *rq)
882{
883	int wq_sz;
884	int i;
885
886	/* destroy all sysctl nodes */
887	sysctl_ctx_free(&rq->stats.ctx);
888
889	/* free leftover LRO packets, if any */
890	tcp_lro_free(&rq->lro);
891
892	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
893	for (i = 0; i != wq_sz; i++) {
894		if (rq->mbuf[i].mbuf != NULL) {
895			bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
896			m_freem(rq->mbuf[i].mbuf);
897		}
898		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
899	}
900	free(rq->mbuf, M_MLX5EN);
901	mlx5_wq_destroy(&rq->wq_ctrl);
902}
903
904static int
905mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
906{
907	struct mlx5e_channel *c = rq->channel;
908	struct mlx5e_priv *priv = c->priv;
909	struct mlx5_core_dev *mdev = priv->mdev;
910
911	void *in;
912	void *rqc;
913	void *wq;
914	int inlen;
915	int err;
916
917	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
918	    sizeof(u64) * rq->wq_ctrl.buf.npages;
919	in = mlx5_vzalloc(inlen);
920	if (in == NULL)
921		return (-ENOMEM);
922
923	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
924	wq = MLX5_ADDR_OF(rqc, rqc, wq);
925
926	memcpy(rqc, param->rqc, sizeof(param->rqc));
927
928	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
929	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
930	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
931	if (priv->counter_set_id >= 0)
932		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
933	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
934	    PAGE_SHIFT);
935	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
936
937	mlx5_fill_page_array(&rq->wq_ctrl.buf,
938	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
939
940	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
941
942	kvfree(in);
943
944	return (err);
945}
946
947static int
948mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
949{
950	struct mlx5e_channel *c = rq->channel;
951	struct mlx5e_priv *priv = c->priv;
952	struct mlx5_core_dev *mdev = priv->mdev;
953
954	void *in;
955	void *rqc;
956	int inlen;
957	int err;
958
959	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
960	in = mlx5_vzalloc(inlen);
961	if (in == NULL)
962		return (-ENOMEM);
963
964	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
965
966	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
967	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
968	MLX5_SET(rqc, rqc, state, next_state);
969
970	err = mlx5_core_modify_rq(mdev, in, inlen);
971
972	kvfree(in);
973
974	return (err);
975}
976
977static void
978mlx5e_disable_rq(struct mlx5e_rq *rq)
979{
980	struct mlx5e_channel *c = rq->channel;
981	struct mlx5e_priv *priv = c->priv;
982	struct mlx5_core_dev *mdev = priv->mdev;
983
984	mlx5_core_destroy_rq(mdev, rq->rqn);
985}
986
987static int
988mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
989{
990	struct mlx5e_channel *c = rq->channel;
991	struct mlx5e_priv *priv = c->priv;
992	struct mlx5_wq_ll *wq = &rq->wq;
993	int i;
994
995	for (i = 0; i < 1000; i++) {
996		if (wq->cur_sz >= priv->params.min_rx_wqes)
997			return (0);
998
999		msleep(4);
1000	}
1001	return (-ETIMEDOUT);
1002}
1003
1004static int
1005mlx5e_open_rq(struct mlx5e_channel *c,
1006    struct mlx5e_rq_param *param,
1007    struct mlx5e_rq *rq)
1008{
1009	int err;
1010
1011	err = mlx5e_create_rq(c, param, rq);
1012	if (err)
1013		return (err);
1014
1015	err = mlx5e_enable_rq(rq, param);
1016	if (err)
1017		goto err_destroy_rq;
1018
1019	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
1020	if (err)
1021		goto err_disable_rq;
1022
1023	c->rq.enabled = 1;
1024
1025	return (0);
1026
1027err_disable_rq:
1028	mlx5e_disable_rq(rq);
1029err_destroy_rq:
1030	mlx5e_destroy_rq(rq);
1031
1032	return (err);
1033}
1034
1035static void
1036mlx5e_close_rq(struct mlx5e_rq *rq)
1037{
1038	mtx_lock(&rq->mtx);
1039	rq->enabled = 0;
1040	callout_stop(&rq->watchdog);
1041	mtx_unlock(&rq->mtx);
1042
1043	callout_drain(&rq->watchdog);
1044
1045	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1046}
1047
1048static void
1049mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1050{
1051
1052	mlx5e_disable_rq(rq);
1053	mlx5e_close_cq(&rq->cq);
1054	cancel_work_sync(&rq->dim.work);
1055	mlx5e_destroy_rq(rq);
1056}
1057
1058void
1059mlx5e_free_sq_db(struct mlx5e_sq *sq)
1060{
1061	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1062	int x;
1063
1064	for (x = 0; x != wq_sz; x++) {
1065		if (sq->mbuf[x].mbuf != NULL) {
1066			bus_dmamap_unload(sq->dma_tag, sq->mbuf[x].dma_map);
1067			m_freem(sq->mbuf[x].mbuf);
1068		}
1069		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1070	}
1071	free(sq->mbuf, M_MLX5EN);
1072}
1073
1074int
1075mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1076{
1077	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1078	int err;
1079	int x;
1080
1081	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1082
1083	/* Create DMA descriptor MAPs */
1084	for (x = 0; x != wq_sz; x++) {
1085		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1086		if (err != 0) {
1087			while (x--)
1088				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1089			free(sq->mbuf, M_MLX5EN);
1090			return (err);
1091		}
1092	}
1093	return (0);
1094}
1095
1096static const char *mlx5e_sq_stats_desc[] = {
1097	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1098};
1099
1100void
1101mlx5e_update_sq_inline(struct mlx5e_sq *sq)
1102{
1103	sq->max_inline = sq->priv->params.tx_max_inline;
1104	sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
1105
1106	/*
1107	 * Check if trust state is DSCP or if inline mode is NONE which
1108	 * indicates CX-5 or newer hardware.
1109	 */
1110	if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
1111	    sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
1112		if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
1113			sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
1114		else
1115			sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
1116	} else {
1117		sq->min_insert_caps = 0;
1118	}
1119}
1120
1121static void
1122mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1123{
1124	int i;
1125
1126	for (i = 0; i != c->num_tc; i++) {
1127		mtx_lock(&c->sq[i].lock);
1128		mlx5e_update_sq_inline(&c->sq[i]);
1129		mtx_unlock(&c->sq[i].lock);
1130	}
1131}
1132
1133void
1134mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
1135{
1136	int i;
1137
1138	/* check if channels are closed */
1139	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
1140		return;
1141
1142	for (i = 0; i < priv->params.num_channels; i++)
1143		mlx5e_refresh_sq_inline_sub(priv, &priv->channel[i]);
1144}
1145
1146static int
1147mlx5e_create_sq(struct mlx5e_channel *c,
1148    int tc,
1149    struct mlx5e_sq_param *param,
1150    struct mlx5e_sq *sq)
1151{
1152	struct mlx5e_priv *priv = c->priv;
1153	struct mlx5_core_dev *mdev = priv->mdev;
1154	char buffer[16];
1155	void *sqc = param->sqc;
1156	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1157	int err;
1158
1159	/* Create DMA descriptor TAG */
1160	if ((err = -bus_dma_tag_create(
1161	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
1162	    1,				/* any alignment */
1163	    0,				/* no boundary */
1164	    BUS_SPACE_MAXADDR,		/* lowaddr */
1165	    BUS_SPACE_MAXADDR,		/* highaddr */
1166	    NULL, NULL,			/* filter, filterarg */
1167	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
1168	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
1169	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
1170	    0,				/* flags */
1171	    NULL, NULL,			/* lockfunc, lockfuncarg */
1172	    &sq->dma_tag)))
1173		goto done;
1174
1175	err = mlx5_alloc_map_uar(mdev, &sq->uar);
1176	if (err)
1177		goto err_free_dma_tag;
1178
1179	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1180	    &sq->wq_ctrl);
1181	if (err)
1182		goto err_unmap_free_uar;
1183
1184	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1185	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1186
1187	err = mlx5e_alloc_sq_db(sq);
1188	if (err)
1189		goto err_sq_wq_destroy;
1190
1191	sq->mkey_be = c->mkey_be;
1192	sq->ifp = priv->ifp;
1193	sq->priv = priv;
1194	sq->tc = tc;
1195
1196	mlx5e_update_sq_inline(sq);
1197
1198	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1199	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1200	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1201	    sq->stats.arg);
1202
1203	return (0);
1204
1205err_sq_wq_destroy:
1206	mlx5_wq_destroy(&sq->wq_ctrl);
1207
1208err_unmap_free_uar:
1209	mlx5_unmap_free_uar(mdev, &sq->uar);
1210
1211err_free_dma_tag:
1212	bus_dma_tag_destroy(sq->dma_tag);
1213done:
1214	return (err);
1215}
1216
1217static void
1218mlx5e_destroy_sq(struct mlx5e_sq *sq)
1219{
1220	/* destroy all sysctl nodes */
1221	sysctl_ctx_free(&sq->stats.ctx);
1222
1223	mlx5e_free_sq_db(sq);
1224	mlx5_wq_destroy(&sq->wq_ctrl);
1225	mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1226}
1227
1228int
1229mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1230    int tis_num)
1231{
1232	void *in;
1233	void *sqc;
1234	void *wq;
1235	int inlen;
1236	int err;
1237
1238	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1239	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1240	in = mlx5_vzalloc(inlen);
1241	if (in == NULL)
1242		return (-ENOMEM);
1243
1244	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1245	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1246
1247	memcpy(sqc, param->sqc, sizeof(param->sqc));
1248
1249	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1250	MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1251	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1252	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1253	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1254
1255	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1256	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1257	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1258	    PAGE_SHIFT);
1259	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1260
1261	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1262	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1263
1264	err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1265
1266	kvfree(in);
1267
1268	return (err);
1269}
1270
1271int
1272mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1273{
1274	void *in;
1275	void *sqc;
1276	int inlen;
1277	int err;
1278
1279	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1280	in = mlx5_vzalloc(inlen);
1281	if (in == NULL)
1282		return (-ENOMEM);
1283
1284	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1285
1286	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1287	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1288	MLX5_SET(sqc, sqc, state, next_state);
1289
1290	err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1291
1292	kvfree(in);
1293
1294	return (err);
1295}
1296
1297void
1298mlx5e_disable_sq(struct mlx5e_sq *sq)
1299{
1300
1301	mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1302}
1303
1304static int
1305mlx5e_open_sq(struct mlx5e_channel *c,
1306    int tc,
1307    struct mlx5e_sq_param *param,
1308    struct mlx5e_sq *sq)
1309{
1310	int err;
1311
1312	err = mlx5e_create_sq(c, tc, param, sq);
1313	if (err)
1314		return (err);
1315
1316	err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1317	if (err)
1318		goto err_destroy_sq;
1319
1320	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1321	if (err)
1322		goto err_disable_sq;
1323
1324	WRITE_ONCE(sq->running, 1);
1325
1326	return (0);
1327
1328err_disable_sq:
1329	mlx5e_disable_sq(sq);
1330err_destroy_sq:
1331	mlx5e_destroy_sq(sq);
1332
1333	return (err);
1334}
1335
1336static void
1337mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1338{
1339	/* fill up remainder with NOPs */
1340	while (sq->cev_counter != 0) {
1341		while (!mlx5e_sq_has_room_for(sq, 1)) {
1342			if (can_sleep != 0) {
1343				mtx_unlock(&sq->lock);
1344				msleep(4);
1345				mtx_lock(&sq->lock);
1346			} else {
1347				goto done;
1348			}
1349		}
1350		/* send a single NOP */
1351		mlx5e_send_nop(sq, 1);
1352		atomic_thread_fence_rel();
1353	}
1354done:
1355	/* Check if we need to write the doorbell */
1356	if (likely(sq->doorbell.d64 != 0)) {
1357		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1358		sq->doorbell.d64 = 0;
1359	}
1360}
1361
1362void
1363mlx5e_sq_cev_timeout(void *arg)
1364{
1365	struct mlx5e_sq *sq = arg;
1366
1367	mtx_assert(&sq->lock, MA_OWNED);
1368
1369	/* check next state */
1370	switch (sq->cev_next_state) {
1371	case MLX5E_CEV_STATE_SEND_NOPS:
1372		/* fill TX ring with NOPs, if any */
1373		mlx5e_sq_send_nops_locked(sq, 0);
1374
1375		/* check if completed */
1376		if (sq->cev_counter == 0) {
1377			sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1378			return;
1379		}
1380		break;
1381	default:
1382		/* send NOPs on next timeout */
1383		sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1384		break;
1385	}
1386
1387	/* restart timer */
1388	callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1389}
1390
1391void
1392mlx5e_drain_sq(struct mlx5e_sq *sq)
1393{
1394	int error;
1395	struct mlx5_core_dev *mdev= sq->priv->mdev;
1396
1397	/*
1398	 * Check if already stopped.
1399	 *
1400	 * NOTE: Serialization of this function is managed by the
1401	 * caller ensuring the priv's state lock is locked or in case
1402	 * of rate limit support, a single thread manages drain and
1403	 * resume of SQs. The "running" variable can therefore safely
1404	 * be read without any locks.
1405	 */
1406	if (READ_ONCE(sq->running) == 0)
1407		return;
1408
1409	/* don't put more packets into the SQ */
1410	WRITE_ONCE(sq->running, 0);
1411
1412	/* serialize access to DMA rings */
1413	mtx_lock(&sq->lock);
1414
1415	/* teardown event factor timer, if any */
1416	sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1417	callout_stop(&sq->cev_callout);
1418
1419	/* send dummy NOPs in order to flush the transmit ring */
1420	mlx5e_sq_send_nops_locked(sq, 1);
1421	mtx_unlock(&sq->lock);
1422
1423	/* make sure it is safe to free the callout */
1424	callout_drain(&sq->cev_callout);
1425
1426	/* wait till SQ is empty or link is down */
1427	mtx_lock(&sq->lock);
1428	while (sq->cc != sq->pc &&
1429	    (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
1430	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1431		mtx_unlock(&sq->lock);
1432		msleep(1);
1433		sq->cq.mcq.comp(&sq->cq.mcq);
1434		mtx_lock(&sq->lock);
1435	}
1436	mtx_unlock(&sq->lock);
1437
1438	/* error out remaining requests */
1439	error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1440	if (error != 0) {
1441		if_printf(sq->ifp,
1442		    "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1443	}
1444
1445	/* wait till SQ is empty */
1446	mtx_lock(&sq->lock);
1447	while (sq->cc != sq->pc &&
1448	       mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1449		mtx_unlock(&sq->lock);
1450		msleep(1);
1451		sq->cq.mcq.comp(&sq->cq.mcq);
1452		mtx_lock(&sq->lock);
1453	}
1454	mtx_unlock(&sq->lock);
1455}
1456
1457static void
1458mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1459{
1460
1461	mlx5e_drain_sq(sq);
1462	mlx5e_disable_sq(sq);
1463	mlx5e_destroy_sq(sq);
1464}
1465
1466static int
1467mlx5e_create_cq(struct mlx5e_priv *priv,
1468    struct mlx5e_cq_param *param,
1469    struct mlx5e_cq *cq,
1470    mlx5e_cq_comp_t *comp,
1471    int eq_ix)
1472{
1473	struct mlx5_core_dev *mdev = priv->mdev;
1474	struct mlx5_core_cq *mcq = &cq->mcq;
1475	int eqn_not_used;
1476	int irqn;
1477	int err;
1478	u32 i;
1479
1480	param->wq.buf_numa_node = 0;
1481	param->wq.db_numa_node = 0;
1482
1483	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1484	    &cq->wq_ctrl);
1485	if (err)
1486		return (err);
1487
1488	mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1489
1490	mcq->cqe_sz = 64;
1491	mcq->set_ci_db = cq->wq_ctrl.db.db;
1492	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1493	*mcq->set_ci_db = 0;
1494	*mcq->arm_db = 0;
1495	mcq->vector = eq_ix;
1496	mcq->comp = comp;
1497	mcq->event = mlx5e_cq_error_event;
1498	mcq->irqn = irqn;
1499	mcq->uar = &priv->cq_uar;
1500
1501	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1502		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1503
1504		cqe->op_own = 0xf1;
1505	}
1506
1507	cq->priv = priv;
1508
1509	return (0);
1510}
1511
1512static void
1513mlx5e_destroy_cq(struct mlx5e_cq *cq)
1514{
1515	mlx5_wq_destroy(&cq->wq_ctrl);
1516}
1517
1518static int
1519mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1520{
1521	struct mlx5_core_cq *mcq = &cq->mcq;
1522	void *in;
1523	void *cqc;
1524	int inlen;
1525	int irqn_not_used;
1526	int eqn;
1527	int err;
1528
1529	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1530	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1531	in = mlx5_vzalloc(inlen);
1532	if (in == NULL)
1533		return (-ENOMEM);
1534
1535	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1536
1537	memcpy(cqc, param->cqc, sizeof(param->cqc));
1538
1539	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1540	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1541
1542	mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1543
1544	MLX5_SET(cqc, cqc, c_eqn, eqn);
1545	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1546	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1547	    PAGE_SHIFT);
1548	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1549
1550	err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1551
1552	kvfree(in);
1553
1554	if (err)
1555		return (err);
1556
1557	mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1558
1559	return (0);
1560}
1561
1562static void
1563mlx5e_disable_cq(struct mlx5e_cq *cq)
1564{
1565
1566	mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1567}
1568
1569int
1570mlx5e_open_cq(struct mlx5e_priv *priv,
1571    struct mlx5e_cq_param *param,
1572    struct mlx5e_cq *cq,
1573    mlx5e_cq_comp_t *comp,
1574    int eq_ix)
1575{
1576	int err;
1577
1578	err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1579	if (err)
1580		return (err);
1581
1582	err = mlx5e_enable_cq(cq, param, eq_ix);
1583	if (err)
1584		goto err_destroy_cq;
1585
1586	return (0);
1587
1588err_destroy_cq:
1589	mlx5e_destroy_cq(cq);
1590
1591	return (err);
1592}
1593
1594void
1595mlx5e_close_cq(struct mlx5e_cq *cq)
1596{
1597	mlx5e_disable_cq(cq);
1598	mlx5e_destroy_cq(cq);
1599}
1600
1601static int
1602mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1603    struct mlx5e_channel_param *cparam)
1604{
1605	int err;
1606	int tc;
1607
1608	for (tc = 0; tc < c->num_tc; tc++) {
1609		/* open completion queue */
1610		err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1611		    &mlx5e_tx_cq_comp, c->ix);
1612		if (err)
1613			goto err_close_tx_cqs;
1614	}
1615	return (0);
1616
1617err_close_tx_cqs:
1618	for (tc--; tc >= 0; tc--)
1619		mlx5e_close_cq(&c->sq[tc].cq);
1620
1621	return (err);
1622}
1623
1624static void
1625mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1626{
1627	int tc;
1628
1629	for (tc = 0; tc < c->num_tc; tc++)
1630		mlx5e_close_cq(&c->sq[tc].cq);
1631}
1632
1633static int
1634mlx5e_open_sqs(struct mlx5e_channel *c,
1635    struct mlx5e_channel_param *cparam)
1636{
1637	int err;
1638	int tc;
1639
1640	for (tc = 0; tc < c->num_tc; tc++) {
1641		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1642		if (err)
1643			goto err_close_sqs;
1644	}
1645
1646	return (0);
1647
1648err_close_sqs:
1649	for (tc--; tc >= 0; tc--)
1650		mlx5e_close_sq_wait(&c->sq[tc]);
1651
1652	return (err);
1653}
1654
1655static void
1656mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1657{
1658	int tc;
1659
1660	for (tc = 0; tc < c->num_tc; tc++)
1661		mlx5e_close_sq_wait(&c->sq[tc]);
1662}
1663
1664static void
1665mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1666{
1667	int tc;
1668
1669	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1670
1671	callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1672
1673	for (tc = 0; tc < c->num_tc; tc++) {
1674		struct mlx5e_sq *sq = c->sq + tc;
1675
1676		mtx_init(&sq->lock, "mlx5tx",
1677		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1678		mtx_init(&sq->comp_lock, "mlx5comp",
1679		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1680
1681		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1682
1683		sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1684
1685		/* ensure the TX completion event factor is not zero */
1686		if (sq->cev_factor == 0)
1687			sq->cev_factor = 1;
1688	}
1689}
1690
1691static void
1692mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1693{
1694	int tc;
1695
1696	mtx_destroy(&c->rq.mtx);
1697
1698	for (tc = 0; tc < c->num_tc; tc++) {
1699		mtx_destroy(&c->sq[tc].lock);
1700		mtx_destroy(&c->sq[tc].comp_lock);
1701	}
1702}
1703
1704static int
1705mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1706    struct mlx5e_channel_param *cparam,
1707    struct mlx5e_channel *c)
1708{
1709	int err;
1710
1711	memset(c, 0, sizeof(*c));
1712
1713	c->priv = priv;
1714	c->ix = ix;
1715	c->ifp = priv->ifp;
1716	c->mkey_be = cpu_to_be32(priv->mr.key);
1717	c->num_tc = priv->num_tc;
1718
1719	/* init mutexes */
1720	mlx5e_chan_mtx_init(c);
1721
1722	/* open transmit completion queue */
1723	err = mlx5e_open_tx_cqs(c, cparam);
1724	if (err)
1725		goto err_free;
1726
1727	/* open receive completion queue */
1728	err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1729	    &mlx5e_rx_cq_comp, c->ix);
1730	if (err)
1731		goto err_close_tx_cqs;
1732
1733	err = mlx5e_open_sqs(c, cparam);
1734	if (err)
1735		goto err_close_rx_cq;
1736
1737	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1738	if (err)
1739		goto err_close_sqs;
1740
1741	/* poll receive queue initially */
1742	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1743
1744	return (0);
1745
1746err_close_sqs:
1747	mlx5e_close_sqs_wait(c);
1748
1749err_close_rx_cq:
1750	mlx5e_close_cq(&c->rq.cq);
1751
1752err_close_tx_cqs:
1753	mlx5e_close_tx_cqs(c);
1754
1755err_free:
1756	/* destroy mutexes */
1757	mlx5e_chan_mtx_destroy(c);
1758	return (err);
1759}
1760
1761static void
1762mlx5e_close_channel(struct mlx5e_channel *c)
1763{
1764	mlx5e_close_rq(&c->rq);
1765}
1766
1767static void
1768mlx5e_close_channel_wait(struct mlx5e_channel *c)
1769{
1770	mlx5e_close_rq_wait(&c->rq);
1771	mlx5e_close_sqs_wait(c);
1772	mlx5e_close_tx_cqs(c);
1773	/* destroy mutexes */
1774	mlx5e_chan_mtx_destroy(c);
1775}
1776
1777static int
1778mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
1779{
1780	u32 r, n;
1781
1782	r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
1783	    MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
1784	if (r > MJUM16BYTES)
1785		return (-ENOMEM);
1786
1787	if (r > MJUM9BYTES)
1788		r = MJUM16BYTES;
1789	else if (r > MJUMPAGESIZE)
1790		r = MJUM9BYTES;
1791	else if (r > MCLBYTES)
1792		r = MJUMPAGESIZE;
1793	else
1794		r = MCLBYTES;
1795
1796	/*
1797	 * n + 1 must be a power of two, because stride size must be.
1798	 * Stride size is 16 * (n + 1), as the first segment is
1799	 * control.
1800	 */
1801	for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
1802		;
1803
1804	*wqe_sz = r;
1805	*nsegs = n;
1806	return (0);
1807}
1808
1809static void
1810mlx5e_build_rq_param(struct mlx5e_priv *priv,
1811    struct mlx5e_rq_param *param)
1812{
1813	void *rqc = param->rqc;
1814	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1815	u32 wqe_sz, nsegs;
1816
1817	mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
1818	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1819	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1820	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
1821	    nsegs * sizeof(struct mlx5_wqe_data_seg)));
1822	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1823	MLX5_SET(wq, wq, pd, priv->pdn);
1824
1825	param->wq.buf_numa_node = 0;
1826	param->wq.db_numa_node = 0;
1827	param->wq.linear = 1;
1828}
1829
1830static void
1831mlx5e_build_sq_param(struct mlx5e_priv *priv,
1832    struct mlx5e_sq_param *param)
1833{
1834	void *sqc = param->sqc;
1835	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1836
1837	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1838	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1839	MLX5_SET(wq, wq, pd, priv->pdn);
1840
1841	param->wq.buf_numa_node = 0;
1842	param->wq.db_numa_node = 0;
1843	param->wq.linear = 1;
1844}
1845
1846static void
1847mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1848    struct mlx5e_cq_param *param)
1849{
1850	void *cqc = param->cqc;
1851
1852	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1853}
1854
1855static void
1856mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct net_dim_cq_moder *ptr)
1857{
1858
1859	*ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE);
1860
1861	/* apply LRO restrictions */
1862	if (priv->params.hw_lro_en &&
1863	    ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
1864		ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
1865	}
1866}
1867
1868static void
1869mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1870    struct mlx5e_cq_param *param)
1871{
1872	struct net_dim_cq_moder curr;
1873	void *cqc = param->cqc;
1874
1875	/*
1876	 * We use MLX5_CQE_FORMAT_HASH because the RX hash mini CQE
1877	 * format is more beneficial for FreeBSD use case.
1878	 *
1879	 * Adding support for MLX5_CQE_FORMAT_CSUM will require changes
1880	 * in mlx5e_decompress_cqe.
1881	 */
1882	if (priv->params.cqe_zipping_en) {
1883		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_HASH);
1884		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1885	}
1886
1887	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1888
1889	switch (priv->params.rx_cq_moderation_mode) {
1890	case 0:
1891		MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1892		MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1893		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1894		break;
1895	case 1:
1896		MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1897		MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1898		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1899			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1900		else
1901			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1902		break;
1903	case 2:
1904		mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr);
1905		MLX5_SET(cqc, cqc, cq_period, curr.usec);
1906		MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
1907		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1908		break;
1909	case 3:
1910		mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr);
1911		MLX5_SET(cqc, cqc, cq_period, curr.usec);
1912		MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
1913		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1914			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1915		else
1916			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1917		break;
1918	default:
1919		break;
1920	}
1921
1922	mlx5e_dim_build_cq_param(priv, param);
1923
1924	mlx5e_build_common_cq_param(priv, param);
1925}
1926
1927static void
1928mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1929    struct mlx5e_cq_param *param)
1930{
1931	void *cqc = param->cqc;
1932
1933	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1934	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1935	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1936
1937	switch (priv->params.tx_cq_moderation_mode) {
1938	case 0:
1939		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1940		break;
1941	default:
1942		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1943			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1944		else
1945			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1946		break;
1947	}
1948
1949	mlx5e_build_common_cq_param(priv, param);
1950}
1951
1952static void
1953mlx5e_build_channel_param(struct mlx5e_priv *priv,
1954    struct mlx5e_channel_param *cparam)
1955{
1956	memset(cparam, 0, sizeof(*cparam));
1957
1958	mlx5e_build_rq_param(priv, &cparam->rq);
1959	mlx5e_build_sq_param(priv, &cparam->sq);
1960	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1961	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1962}
1963
1964static int
1965mlx5e_open_channels(struct mlx5e_priv *priv)
1966{
1967	struct mlx5e_channel_param cparam;
1968	int err;
1969	int i;
1970	int j;
1971
1972	mlx5e_build_channel_param(priv, &cparam);
1973	for (i = 0; i < priv->params.num_channels; i++) {
1974		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1975		if (err)
1976			goto err_close_channels;
1977	}
1978
1979	for (j = 0; j < priv->params.num_channels; j++) {
1980		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j].rq);
1981		if (err)
1982			goto err_close_channels;
1983	}
1984
1985	return (0);
1986
1987err_close_channels:
1988	while (i--) {
1989		mlx5e_close_channel(&priv->channel[i]);
1990		mlx5e_close_channel_wait(&priv->channel[i]);
1991	}
1992	return (err);
1993}
1994
1995static void
1996mlx5e_close_channels(struct mlx5e_priv *priv)
1997{
1998	int i;
1999
2000	for (i = 0; i < priv->params.num_channels; i++)
2001		mlx5e_close_channel(&priv->channel[i]);
2002	for (i = 0; i < priv->params.num_channels; i++)
2003		mlx5e_close_channel_wait(&priv->channel[i]);
2004}
2005
2006static int
2007mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
2008{
2009
2010	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2011		uint8_t cq_mode;
2012
2013		switch (priv->params.tx_cq_moderation_mode) {
2014		case 0:
2015		case 2:
2016			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2017			break;
2018		default:
2019			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2020			break;
2021		}
2022
2023		return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
2024		    priv->params.tx_cq_moderation_usec,
2025		    priv->params.tx_cq_moderation_pkts,
2026		    cq_mode));
2027	}
2028
2029	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
2030	    priv->params.tx_cq_moderation_usec,
2031	    priv->params.tx_cq_moderation_pkts));
2032}
2033
2034static int
2035mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
2036{
2037
2038	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2039		uint8_t cq_mode;
2040		uint8_t dim_mode;
2041		int retval;
2042
2043		switch (priv->params.rx_cq_moderation_mode) {
2044		case 0:
2045		case 2:
2046			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2047			dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
2048			break;
2049		default:
2050			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2051			dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
2052			break;
2053		}
2054
2055		/* tear down dynamic interrupt moderation */
2056		mtx_lock(&rq->mtx);
2057		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
2058		mtx_unlock(&rq->mtx);
2059
2060		/* wait for dynamic interrupt moderation work task, if any */
2061		cancel_work_sync(&rq->dim.work);
2062
2063		if (priv->params.rx_cq_moderation_mode >= 2) {
2064			struct net_dim_cq_moder curr;
2065
2066			mlx5e_get_default_profile(priv, dim_mode, &curr);
2067
2068			retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2069			    curr.usec, curr.pkts, cq_mode);
2070
2071			/* set dynamic interrupt moderation mode and zero defaults */
2072			mtx_lock(&rq->mtx);
2073			rq->dim.mode = dim_mode;
2074			rq->dim.state = 0;
2075			rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE;
2076			mtx_unlock(&rq->mtx);
2077		} else {
2078			retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2079			    priv->params.rx_cq_moderation_usec,
2080			    priv->params.rx_cq_moderation_pkts,
2081			    cq_mode);
2082		}
2083		return (retval);
2084	}
2085
2086	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
2087	    priv->params.rx_cq_moderation_usec,
2088	    priv->params.rx_cq_moderation_pkts));
2089}
2090
2091static int
2092mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
2093{
2094	int err;
2095	int i;
2096
2097	err = mlx5e_refresh_rq_params(priv, &c->rq);
2098	if (err)
2099		goto done;
2100
2101	for (i = 0; i != c->num_tc; i++) {
2102		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2103		if (err)
2104			goto done;
2105	}
2106done:
2107	return (err);
2108}
2109
2110int
2111mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2112{
2113	int i;
2114
2115	/* check if channels are closed */
2116	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2117		return (EINVAL);
2118
2119	for (i = 0; i < priv->params.num_channels; i++) {
2120		int err;
2121
2122		err = mlx5e_refresh_channel_params_sub(priv, &priv->channel[i]);
2123		if (err)
2124			return (err);
2125	}
2126	return (0);
2127}
2128
2129static int
2130mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2131{
2132	struct mlx5_core_dev *mdev = priv->mdev;
2133	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2134	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2135
2136	memset(in, 0, sizeof(in));
2137
2138	MLX5_SET(tisc, tisc, prio, tc);
2139	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2140
2141	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2142}
2143
2144static void
2145mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2146{
2147	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2148}
2149
2150static int
2151mlx5e_open_tises(struct mlx5e_priv *priv)
2152{
2153	int num_tc = priv->num_tc;
2154	int err;
2155	int tc;
2156
2157	for (tc = 0; tc < num_tc; tc++) {
2158		err = mlx5e_open_tis(priv, tc);
2159		if (err)
2160			goto err_close_tises;
2161	}
2162
2163	return (0);
2164
2165err_close_tises:
2166	for (tc--; tc >= 0; tc--)
2167		mlx5e_close_tis(priv, tc);
2168
2169	return (err);
2170}
2171
2172static void
2173mlx5e_close_tises(struct mlx5e_priv *priv)
2174{
2175	int num_tc = priv->num_tc;
2176	int tc;
2177
2178	for (tc = 0; tc < num_tc; tc++)
2179		mlx5e_close_tis(priv, tc);
2180}
2181
2182static int
2183mlx5e_open_rqt(struct mlx5e_priv *priv)
2184{
2185	struct mlx5_core_dev *mdev = priv->mdev;
2186	u32 *in;
2187	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
2188	void *rqtc;
2189	int inlen;
2190	int err;
2191	int sz;
2192	int i;
2193
2194	sz = 1 << priv->params.rx_hash_log_tbl_sz;
2195
2196	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2197	in = mlx5_vzalloc(inlen);
2198	if (in == NULL)
2199		return (-ENOMEM);
2200	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2201
2202	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2203	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2204
2205	for (i = 0; i < sz; i++) {
2206		int ix = i;
2207#ifdef RSS
2208		ix = rss_get_indirection_to_bucket(ix);
2209#endif
2210		/* ensure we don't overflow */
2211		ix %= priv->params.num_channels;
2212
2213		/* apply receive side scaling stride, if any */
2214		ix -= ix % (int)priv->params.channels_rsss;
2215
2216		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix].rq.rqn);
2217	}
2218
2219	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2220
2221	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
2222	if (!err)
2223		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2224
2225	kvfree(in);
2226
2227	return (err);
2228}
2229
2230static void
2231mlx5e_close_rqt(struct mlx5e_priv *priv)
2232{
2233	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
2234	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
2235
2236	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2237	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2238
2239	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
2240}
2241
2242static void
2243mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2244{
2245	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2246	__be32 *hkey;
2247
2248	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2249
2250#define	ROUGH_MAX_L2_L3_HDR_SZ 256
2251
2252#define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2253			  MLX5_HASH_FIELD_SEL_DST_IP)
2254
2255#define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2256			  MLX5_HASH_FIELD_SEL_DST_IP   |\
2257			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
2258			  MLX5_HASH_FIELD_SEL_L4_DPORT)
2259
2260#define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
2261				 MLX5_HASH_FIELD_SEL_DST_IP   |\
2262				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2263
2264	if (priv->params.hw_lro_en) {
2265		MLX5_SET(tirc, tirc, lro_enable_mask,
2266		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2267		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2268		MLX5_SET(tirc, tirc, lro_max_msg_sz,
2269		    (priv->params.lro_wqe_sz -
2270		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2271		/* TODO: add the option to choose timer value dynamically */
2272		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2273		    MLX5_CAP_ETH(priv->mdev,
2274		    lro_timer_supported_periods[2]));
2275	}
2276
2277	/* setup parameters for hashing TIR type, if any */
2278	switch (tt) {
2279	case MLX5E_TT_ANY:
2280		MLX5_SET(tirc, tirc, disp_type,
2281		    MLX5_TIRC_DISP_TYPE_DIRECT);
2282		MLX5_SET(tirc, tirc, inline_rqn,
2283		    priv->channel[0].rq.rqn);
2284		break;
2285	default:
2286		MLX5_SET(tirc, tirc, disp_type,
2287		    MLX5_TIRC_DISP_TYPE_INDIRECT);
2288		MLX5_SET(tirc, tirc, indirect_table,
2289		    priv->rqtn);
2290		MLX5_SET(tirc, tirc, rx_hash_fn,
2291		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2292		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2293#ifdef RSS
2294		/*
2295		 * The FreeBSD RSS implementation does currently not
2296		 * support symmetric Toeplitz hashes:
2297		 */
2298		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2299		rss_getkey((uint8_t *)hkey);
2300#else
2301		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2302		hkey[0] = cpu_to_be32(0xD181C62C);
2303		hkey[1] = cpu_to_be32(0xF7F4DB5B);
2304		hkey[2] = cpu_to_be32(0x1983A2FC);
2305		hkey[3] = cpu_to_be32(0x943E1ADB);
2306		hkey[4] = cpu_to_be32(0xD9389E6B);
2307		hkey[5] = cpu_to_be32(0xD1039C2C);
2308		hkey[6] = cpu_to_be32(0xA74499AD);
2309		hkey[7] = cpu_to_be32(0x593D56D9);
2310		hkey[8] = cpu_to_be32(0xF3253C06);
2311		hkey[9] = cpu_to_be32(0x2ADC1FFC);
2312#endif
2313		break;
2314	}
2315
2316	switch (tt) {
2317	case MLX5E_TT_IPV4_TCP:
2318		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2319		    MLX5_L3_PROT_TYPE_IPV4);
2320		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2321		    MLX5_L4_PROT_TYPE_TCP);
2322#ifdef RSS
2323		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2324			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2325			    MLX5_HASH_IP);
2326		} else
2327#endif
2328		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2329		    MLX5_HASH_ALL);
2330		break;
2331
2332	case MLX5E_TT_IPV6_TCP:
2333		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2334		    MLX5_L3_PROT_TYPE_IPV6);
2335		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2336		    MLX5_L4_PROT_TYPE_TCP);
2337#ifdef RSS
2338		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2339			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2340			    MLX5_HASH_IP);
2341		} else
2342#endif
2343		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2344		    MLX5_HASH_ALL);
2345		break;
2346
2347	case MLX5E_TT_IPV4_UDP:
2348		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2349		    MLX5_L3_PROT_TYPE_IPV4);
2350		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2351		    MLX5_L4_PROT_TYPE_UDP);
2352#ifdef RSS
2353		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2354			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2355			    MLX5_HASH_IP);
2356		} else
2357#endif
2358		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2359		    MLX5_HASH_ALL);
2360		break;
2361
2362	case MLX5E_TT_IPV6_UDP:
2363		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2364		    MLX5_L3_PROT_TYPE_IPV6);
2365		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2366		    MLX5_L4_PROT_TYPE_UDP);
2367#ifdef RSS
2368		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2369			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2370			    MLX5_HASH_IP);
2371		} else
2372#endif
2373		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2374		    MLX5_HASH_ALL);
2375		break;
2376
2377	case MLX5E_TT_IPV4_IPSEC_AH:
2378		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2379		    MLX5_L3_PROT_TYPE_IPV4);
2380		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2381		    MLX5_HASH_IP_IPSEC_SPI);
2382		break;
2383
2384	case MLX5E_TT_IPV6_IPSEC_AH:
2385		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2386		    MLX5_L3_PROT_TYPE_IPV6);
2387		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2388		    MLX5_HASH_IP_IPSEC_SPI);
2389		break;
2390
2391	case MLX5E_TT_IPV4_IPSEC_ESP:
2392		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2393		    MLX5_L3_PROT_TYPE_IPV4);
2394		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2395		    MLX5_HASH_IP_IPSEC_SPI);
2396		break;
2397
2398	case MLX5E_TT_IPV6_IPSEC_ESP:
2399		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2400		    MLX5_L3_PROT_TYPE_IPV6);
2401		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2402		    MLX5_HASH_IP_IPSEC_SPI);
2403		break;
2404
2405	case MLX5E_TT_IPV4:
2406		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2407		    MLX5_L3_PROT_TYPE_IPV4);
2408		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2409		    MLX5_HASH_IP);
2410		break;
2411
2412	case MLX5E_TT_IPV6:
2413		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2414		    MLX5_L3_PROT_TYPE_IPV6);
2415		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2416		    MLX5_HASH_IP);
2417		break;
2418
2419	default:
2420		break;
2421	}
2422}
2423
2424static int
2425mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2426{
2427	struct mlx5_core_dev *mdev = priv->mdev;
2428	u32 *in;
2429	void *tirc;
2430	int inlen;
2431	int err;
2432
2433	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2434	in = mlx5_vzalloc(inlen);
2435	if (in == NULL)
2436		return (-ENOMEM);
2437	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2438
2439	mlx5e_build_tir_ctx(priv, tirc, tt);
2440
2441	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2442
2443	kvfree(in);
2444
2445	return (err);
2446}
2447
2448static void
2449mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2450{
2451	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2452}
2453
2454static int
2455mlx5e_open_tirs(struct mlx5e_priv *priv)
2456{
2457	int err;
2458	int i;
2459
2460	for (i = 0; i < MLX5E_NUM_TT; i++) {
2461		err = mlx5e_open_tir(priv, i);
2462		if (err)
2463			goto err_close_tirs;
2464	}
2465
2466	return (0);
2467
2468err_close_tirs:
2469	for (i--; i >= 0; i--)
2470		mlx5e_close_tir(priv, i);
2471
2472	return (err);
2473}
2474
2475static void
2476mlx5e_close_tirs(struct mlx5e_priv *priv)
2477{
2478	int i;
2479
2480	for (i = 0; i < MLX5E_NUM_TT; i++)
2481		mlx5e_close_tir(priv, i);
2482}
2483
2484/*
2485 * SW MTU does not include headers,
2486 * HW MTU includes all headers and checksums.
2487 */
2488static int
2489mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2490{
2491	struct mlx5e_priv *priv = ifp->if_softc;
2492	struct mlx5_core_dev *mdev = priv->mdev;
2493	int hw_mtu;
2494	int err;
2495
2496	hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
2497
2498	err = mlx5_set_port_mtu(mdev, hw_mtu);
2499	if (err) {
2500		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2501		    __func__, sw_mtu, err);
2502		return (err);
2503	}
2504
2505	/* Update vport context MTU */
2506	err = mlx5_set_vport_mtu(mdev, hw_mtu);
2507	if (err) {
2508		if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
2509		    __func__, err);
2510	}
2511
2512	ifp->if_mtu = sw_mtu;
2513
2514	err = mlx5_query_vport_mtu(mdev, &hw_mtu);
2515	if (err || !hw_mtu) {
2516		/* fallback to port oper mtu */
2517		err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2518	}
2519	if (err) {
2520		if_printf(ifp, "Query port MTU, after setting new "
2521		    "MTU value, failed\n");
2522		return (err);
2523	} else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2524		err = -E2BIG,
2525		if_printf(ifp, "Port MTU %d is smaller than "
2526                    "ifp mtu %d\n", hw_mtu, sw_mtu);
2527	} else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2528		err = -EINVAL;
2529                if_printf(ifp, "Port MTU %d is bigger than "
2530                    "ifp mtu %d\n", hw_mtu, sw_mtu);
2531	}
2532	priv->params_ethtool.hw_mtu = hw_mtu;
2533
2534	return (err);
2535}
2536
2537int
2538mlx5e_open_locked(struct ifnet *ifp)
2539{
2540	struct mlx5e_priv *priv = ifp->if_softc;
2541	int err;
2542	u16 set_id;
2543
2544	/* check if already opened */
2545	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2546		return (0);
2547
2548#ifdef RSS
2549	if (rss_getnumbuckets() > priv->params.num_channels) {
2550		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2551		    "channels(%u) available\n", rss_getnumbuckets(),
2552		    priv->params.num_channels);
2553	}
2554#endif
2555	err = mlx5e_open_tises(priv);
2556	if (err) {
2557		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2558		    __func__, err);
2559		return (err);
2560	}
2561	err = mlx5_vport_alloc_q_counter(priv->mdev,
2562	    MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2563	if (err) {
2564		if_printf(priv->ifp,
2565		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2566		    __func__, err);
2567		goto err_close_tises;
2568	}
2569	/* store counter set ID */
2570	priv->counter_set_id = set_id;
2571
2572	err = mlx5e_open_channels(priv);
2573	if (err) {
2574		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2575		    __func__, err);
2576		goto err_dalloc_q_counter;
2577	}
2578	err = mlx5e_open_rqt(priv);
2579	if (err) {
2580		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2581		    __func__, err);
2582		goto err_close_channels;
2583	}
2584	err = mlx5e_open_tirs(priv);
2585	if (err) {
2586		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2587		    __func__, err);
2588		goto err_close_rqls;
2589	}
2590	err = mlx5e_open_flow_table(priv);
2591	if (err) {
2592		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2593		    __func__, err);
2594		goto err_close_tirs;
2595	}
2596	err = mlx5e_add_all_vlan_rules(priv);
2597	if (err) {
2598		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2599		    __func__, err);
2600		goto err_close_flow_table;
2601	}
2602	set_bit(MLX5E_STATE_OPENED, &priv->state);
2603
2604	mlx5e_update_carrier(priv);
2605	mlx5e_set_rx_mode_core(priv);
2606
2607	return (0);
2608
2609err_close_flow_table:
2610	mlx5e_close_flow_table(priv);
2611
2612err_close_tirs:
2613	mlx5e_close_tirs(priv);
2614
2615err_close_rqls:
2616	mlx5e_close_rqt(priv);
2617
2618err_close_channels:
2619	mlx5e_close_channels(priv);
2620
2621err_dalloc_q_counter:
2622	mlx5_vport_dealloc_q_counter(priv->mdev,
2623	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2624
2625err_close_tises:
2626	mlx5e_close_tises(priv);
2627
2628	return (err);
2629}
2630
2631static void
2632mlx5e_open(void *arg)
2633{
2634	struct mlx5e_priv *priv = arg;
2635
2636	PRIV_LOCK(priv);
2637	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2638		if_printf(priv->ifp,
2639		    "%s: Setting port status to up failed\n",
2640		    __func__);
2641
2642	mlx5e_open_locked(priv->ifp);
2643	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2644	PRIV_UNLOCK(priv);
2645}
2646
2647int
2648mlx5e_close_locked(struct ifnet *ifp)
2649{
2650	struct mlx5e_priv *priv = ifp->if_softc;
2651
2652	/* check if already closed */
2653	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2654		return (0);
2655
2656	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2657
2658	mlx5e_set_rx_mode_core(priv);
2659	mlx5e_del_all_vlan_rules(priv);
2660	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2661	mlx5e_close_flow_table(priv);
2662	mlx5e_close_tirs(priv);
2663	mlx5e_close_rqt(priv);
2664	mlx5e_close_channels(priv);
2665	mlx5_vport_dealloc_q_counter(priv->mdev,
2666	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2667	mlx5e_close_tises(priv);
2668
2669	return (0);
2670}
2671
2672#if (__FreeBSD_version >= 1100000)
2673static uint64_t
2674mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2675{
2676	struct mlx5e_priv *priv = ifp->if_softc;
2677	u64 retval;
2678
2679	/* PRIV_LOCK(priv); XXX not allowed */
2680	switch (cnt) {
2681	case IFCOUNTER_IPACKETS:
2682		retval = priv->stats.vport.rx_packets;
2683		break;
2684	case IFCOUNTER_IERRORS:
2685		retval = priv->stats.pport.in_range_len_errors +
2686		    priv->stats.pport.out_of_range_len +
2687		    priv->stats.pport.too_long_errors +
2688		    priv->stats.pport.check_seq_err +
2689		    priv->stats.pport.alignment_err;
2690		break;
2691	case IFCOUNTER_IQDROPS:
2692		retval = priv->stats.vport.rx_out_of_buffer;
2693		break;
2694	case IFCOUNTER_OPACKETS:
2695		retval = priv->stats.vport.tx_packets;
2696		break;
2697	case IFCOUNTER_OERRORS:
2698		retval = priv->stats.port_stats_debug.out_discards;
2699		break;
2700	case IFCOUNTER_IBYTES:
2701		retval = priv->stats.vport.rx_bytes;
2702		break;
2703	case IFCOUNTER_OBYTES:
2704		retval = priv->stats.vport.tx_bytes;
2705		break;
2706	case IFCOUNTER_IMCASTS:
2707		retval = priv->stats.vport.rx_multicast_packets;
2708		break;
2709	case IFCOUNTER_OMCASTS:
2710		retval = priv->stats.vport.tx_multicast_packets;
2711		break;
2712	case IFCOUNTER_OQDROPS:
2713		retval = priv->stats.vport.tx_queue_dropped;
2714		break;
2715	case IFCOUNTER_COLLISIONS:
2716		retval = priv->stats.pport.collisions;
2717		break;
2718	default:
2719		retval = if_get_counter_default(ifp, cnt);
2720		break;
2721	}
2722	/* PRIV_UNLOCK(priv); XXX not allowed */
2723	return (retval);
2724}
2725#endif
2726
2727static void
2728mlx5e_set_rx_mode(struct ifnet *ifp)
2729{
2730	struct mlx5e_priv *priv = ifp->if_softc;
2731
2732	queue_work(priv->wq, &priv->set_rx_mode_work);
2733}
2734
2735static int
2736mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2737{
2738	struct mlx5e_priv *priv;
2739	struct ifreq *ifr;
2740	struct ifi2creq i2c;
2741	int error = 0;
2742	int mask = 0;
2743	int size_read = 0;
2744	int module_status;
2745	int module_num;
2746	int max_mtu;
2747	uint8_t read_addr;
2748
2749	priv = ifp->if_softc;
2750
2751	/* check if detaching */
2752	if (priv == NULL || priv->gone != 0)
2753		return (ENXIO);
2754
2755	switch (command) {
2756	case SIOCSIFMTU:
2757		ifr = (struct ifreq *)data;
2758
2759		PRIV_LOCK(priv);
2760		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2761
2762		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2763		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2764			int was_opened;
2765
2766			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2767			if (was_opened)
2768				mlx5e_close_locked(ifp);
2769
2770			/* set new MTU */
2771			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2772
2773			if (was_opened)
2774				mlx5e_open_locked(ifp);
2775		} else {
2776			error = EINVAL;
2777			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2778			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2779		}
2780		PRIV_UNLOCK(priv);
2781		break;
2782	case SIOCSIFFLAGS:
2783		if ((ifp->if_flags & IFF_UP) &&
2784		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2785			mlx5e_set_rx_mode(ifp);
2786			break;
2787		}
2788		PRIV_LOCK(priv);
2789		if (ifp->if_flags & IFF_UP) {
2790			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2791				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2792					mlx5e_open_locked(ifp);
2793				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2794				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2795			}
2796		} else {
2797			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2798				mlx5_set_port_status(priv->mdev,
2799				    MLX5_PORT_DOWN);
2800				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2801					mlx5e_close_locked(ifp);
2802				mlx5e_update_carrier(priv);
2803				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2804			}
2805		}
2806		PRIV_UNLOCK(priv);
2807		break;
2808	case SIOCADDMULTI:
2809	case SIOCDELMULTI:
2810		mlx5e_set_rx_mode(ifp);
2811		break;
2812	case SIOCSIFMEDIA:
2813	case SIOCGIFMEDIA:
2814	case SIOCGIFXMEDIA:
2815		ifr = (struct ifreq *)data;
2816		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2817		break;
2818	case SIOCSIFCAP:
2819		ifr = (struct ifreq *)data;
2820		PRIV_LOCK(priv);
2821		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2822
2823		if (mask & IFCAP_TXCSUM) {
2824			ifp->if_capenable ^= IFCAP_TXCSUM;
2825			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2826
2827			if (IFCAP_TSO4 & ifp->if_capenable &&
2828			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2829				ifp->if_capenable &= ~IFCAP_TSO4;
2830				ifp->if_hwassist &= ~CSUM_IP_TSO;
2831				if_printf(ifp,
2832				    "tso4 disabled due to -txcsum.\n");
2833			}
2834		}
2835		if (mask & IFCAP_TXCSUM_IPV6) {
2836			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2837			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2838
2839			if (IFCAP_TSO6 & ifp->if_capenable &&
2840			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2841				ifp->if_capenable &= ~IFCAP_TSO6;
2842				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2843				if_printf(ifp,
2844				    "tso6 disabled due to -txcsum6.\n");
2845			}
2846		}
2847		if (mask & IFCAP_RXCSUM)
2848			ifp->if_capenable ^= IFCAP_RXCSUM;
2849		if (mask & IFCAP_RXCSUM_IPV6)
2850			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2851		if (mask & IFCAP_TSO4) {
2852			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2853			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2854				if_printf(ifp, "enable txcsum first.\n");
2855				error = EAGAIN;
2856				goto out;
2857			}
2858			ifp->if_capenable ^= IFCAP_TSO4;
2859			ifp->if_hwassist ^= CSUM_IP_TSO;
2860		}
2861		if (mask & IFCAP_TSO6) {
2862			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2863			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2864				if_printf(ifp, "enable txcsum6 first.\n");
2865				error = EAGAIN;
2866				goto out;
2867			}
2868			ifp->if_capenable ^= IFCAP_TSO6;
2869			ifp->if_hwassist ^= CSUM_IP6_TSO;
2870		}
2871		if (mask & IFCAP_VLAN_HWFILTER) {
2872			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2873				mlx5e_disable_vlan_filter(priv);
2874			else
2875				mlx5e_enable_vlan_filter(priv);
2876
2877			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2878		}
2879		if (mask & IFCAP_VLAN_HWTAGGING)
2880			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2881		if (mask & IFCAP_WOL_MAGIC)
2882			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2883
2884		VLAN_CAPABILITIES(ifp);
2885		/* turn off LRO means also turn of HW LRO - if it's on */
2886		if (mask & IFCAP_LRO) {
2887			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2888			bool need_restart = false;
2889
2890			ifp->if_capenable ^= IFCAP_LRO;
2891
2892			/* figure out if updating HW LRO is needed */
2893			if (!(ifp->if_capenable & IFCAP_LRO)) {
2894				if (priv->params.hw_lro_en) {
2895					priv->params.hw_lro_en = false;
2896					need_restart = true;
2897				}
2898			} else {
2899				if (priv->params.hw_lro_en == false &&
2900				    priv->params_ethtool.hw_lro != 0) {
2901					priv->params.hw_lro_en = true;
2902					need_restart = true;
2903				}
2904			}
2905			if (was_opened && need_restart) {
2906				mlx5e_close_locked(ifp);
2907				mlx5e_open_locked(ifp);
2908			}
2909		}
2910out:
2911		PRIV_UNLOCK(priv);
2912		break;
2913
2914	case SIOCGI2C:
2915		ifr = (struct ifreq *)data;
2916
2917		/*
2918		 * Copy from the user-space address ifr_data to the
2919		 * kernel-space address i2c
2920		 */
2921		error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2922		if (error)
2923			break;
2924
2925		if (i2c.len > sizeof(i2c.data)) {
2926			error = EINVAL;
2927			break;
2928		}
2929
2930		PRIV_LOCK(priv);
2931		/* Get module_num which is required for the query_eeprom */
2932		error = mlx5_query_module_num(priv->mdev, &module_num);
2933		if (error) {
2934			if_printf(ifp, "Query module num failed, eeprom "
2935			    "reading is not supported\n");
2936			error = EINVAL;
2937			goto err_i2c;
2938		}
2939		/* Check if module is present before doing an access */
2940		module_status = mlx5_query_module_status(priv->mdev, module_num);
2941		if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
2942		    module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
2943			error = EINVAL;
2944			goto err_i2c;
2945		}
2946		/*
2947		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2948		 * The internal conversion is as follows:
2949		 */
2950		if (i2c.dev_addr == 0xA0)
2951			read_addr = MLX5E_I2C_ADDR_LOW;
2952		else if (i2c.dev_addr == 0xA2)
2953			read_addr = MLX5E_I2C_ADDR_HIGH;
2954		else {
2955			if_printf(ifp, "Query eeprom failed, "
2956			    "Invalid Address: %X\n", i2c.dev_addr);
2957			error = EINVAL;
2958			goto err_i2c;
2959		}
2960		error = mlx5_query_eeprom(priv->mdev,
2961		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2962		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2963		    (uint32_t *)i2c.data, &size_read);
2964		if (error) {
2965			if_printf(ifp, "Query eeprom failed, eeprom "
2966			    "reading is not supported\n");
2967			error = EINVAL;
2968			goto err_i2c;
2969		}
2970
2971		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2972			error = mlx5_query_eeprom(priv->mdev,
2973			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2974			    (uint32_t)(i2c.offset + size_read),
2975			    (uint32_t)(i2c.len - size_read), module_num,
2976			    (uint32_t *)(i2c.data + size_read), &size_read);
2977		}
2978		if (error) {
2979			if_printf(ifp, "Query eeprom failed, eeprom "
2980			    "reading is not supported\n");
2981			error = EINVAL;
2982			goto err_i2c;
2983		}
2984
2985		error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2986err_i2c:
2987		PRIV_UNLOCK(priv);
2988		break;
2989
2990	default:
2991		error = ether_ioctl(ifp, command, data);
2992		break;
2993	}
2994	return (error);
2995}
2996
2997static int
2998mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2999{
3000	/*
3001	 * TODO: uncoment once FW really sets all these bits if
3002	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
3003	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
3004	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
3005	 * -ENOTSUPP;
3006	 */
3007
3008	/* TODO: add more must-to-have features */
3009
3010	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
3011		return (-ENODEV);
3012
3013	return (0);
3014}
3015
3016static u16
3017mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
3018{
3019	uint32_t bf_buf_size = (1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U;
3020
3021	bf_buf_size -= sizeof(struct mlx5e_tx_wqe) - 2;
3022
3023	/* verify against driver hardware limit */
3024	if (bf_buf_size > MLX5E_MAX_TX_INLINE)
3025		bf_buf_size = MLX5E_MAX_TX_INLINE;
3026
3027	return (bf_buf_size);
3028}
3029
3030static int
3031mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
3032    struct mlx5e_priv *priv,
3033    int num_comp_vectors)
3034{
3035	int err;
3036
3037	/*
3038	 * TODO: Consider link speed for setting "log_sq_size",
3039	 * "log_rq_size" and "cq_moderation_xxx":
3040	 */
3041	priv->params.log_sq_size =
3042	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
3043	priv->params.log_rq_size =
3044	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
3045	priv->params.rx_cq_moderation_usec =
3046	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
3047	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
3048	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
3049	priv->params.rx_cq_moderation_mode =
3050	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
3051	priv->params.rx_cq_moderation_pkts =
3052	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
3053	priv->params.tx_cq_moderation_usec =
3054	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
3055	priv->params.tx_cq_moderation_pkts =
3056	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
3057	priv->params.min_rx_wqes =
3058	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
3059	priv->params.rx_hash_log_tbl_sz =
3060	    (order_base_2(num_comp_vectors) >
3061	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
3062	    order_base_2(num_comp_vectors) :
3063	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
3064	priv->params.num_tc = 1;
3065	priv->params.default_vlan_prio = 0;
3066	priv->counter_set_id = -1;
3067	priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
3068
3069	err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
3070	if (err)
3071		return (err);
3072
3073	/*
3074	 * hw lro is currently defaulted to off. when it won't anymore we
3075	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
3076	 */
3077	priv->params.hw_lro_en = false;
3078	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
3079
3080	/*
3081	 * CQE zipping is currently defaulted to off. when it won't
3082	 * anymore we will consider the HW capability:
3083	 * "!!MLX5_CAP_GEN(mdev, cqe_compression)"
3084	 */
3085	priv->params.cqe_zipping_en = false;
3086
3087	priv->mdev = mdev;
3088	priv->params.num_channels = num_comp_vectors;
3089	priv->params.channels_rsss = 1;
3090	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
3091	priv->queue_mapping_channel_mask =
3092	    roundup_pow_of_two(num_comp_vectors) - 1;
3093	priv->num_tc = priv->params.num_tc;
3094	priv->default_vlan_prio = priv->params.default_vlan_prio;
3095
3096	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
3097	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
3098	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
3099
3100	return (0);
3101}
3102
3103static int
3104mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
3105		  struct mlx5_core_mr *mkey)
3106{
3107	struct ifnet *ifp = priv->ifp;
3108	struct mlx5_core_dev *mdev = priv->mdev;
3109	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
3110	void *mkc;
3111	u32 *in;
3112	int err;
3113
3114	in = mlx5_vzalloc(inlen);
3115	if (in == NULL) {
3116		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
3117		return (-ENOMEM);
3118	}
3119
3120	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
3121	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
3122	MLX5_SET(mkc, mkc, lw, 1);
3123	MLX5_SET(mkc, mkc, lr, 1);
3124
3125	MLX5_SET(mkc, mkc, pd, pdn);
3126	MLX5_SET(mkc, mkc, length64, 1);
3127	MLX5_SET(mkc, mkc, qpn, 0xffffff);
3128
3129	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
3130	if (err)
3131		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
3132		    __func__, err);
3133
3134	kvfree(in);
3135	return (err);
3136}
3137
3138static const char *mlx5e_vport_stats_desc[] = {
3139	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3140};
3141
3142static const char *mlx5e_pport_stats_desc[] = {
3143	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3144};
3145
3146static void
3147mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3148{
3149	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3150	sx_init(&priv->state_lock, "mlx5state");
3151	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3152	MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3153}
3154
3155static void
3156mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3157{
3158	mtx_destroy(&priv->async_events_mtx);
3159	sx_destroy(&priv->state_lock);
3160}
3161
3162static int
3163sysctl_firmware(SYSCTL_HANDLER_ARGS)
3164{
3165	/*
3166	 * %d.%d%.d the string format.
3167	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3168	 * We need at most 5 chars to store that.
3169	 * It also has: two "." and NULL at the end, which means we need 18
3170	 * (5*3 + 3) chars at most.
3171	 */
3172	char fw[18];
3173	struct mlx5e_priv *priv = arg1;
3174	int error;
3175
3176	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3177	    fw_rev_sub(priv->mdev));
3178	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3179	return (error);
3180}
3181
3182static void
3183mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3184{
3185	int i;
3186
3187	for (i = 0; i < ch->num_tc; i++)
3188		mlx5e_drain_sq(&ch->sq[i]);
3189}
3190
3191static void
3192mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3193{
3194
3195	sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3196	sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3197	mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3198	sq->doorbell.d64 = 0;
3199}
3200
3201void
3202mlx5e_resume_sq(struct mlx5e_sq *sq)
3203{
3204	int err;
3205
3206	/* check if already enabled */
3207	if (READ_ONCE(sq->running) != 0)
3208		return;
3209
3210	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3211	    MLX5_SQC_STATE_RST);
3212	if (err != 0) {
3213		if_printf(sq->ifp,
3214		    "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3215	}
3216
3217	sq->cc = 0;
3218	sq->pc = 0;
3219
3220	/* reset doorbell prior to moving from RST to RDY */
3221	mlx5e_reset_sq_doorbell_record(sq);
3222
3223	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3224	    MLX5_SQC_STATE_RDY);
3225	if (err != 0) {
3226		if_printf(sq->ifp,
3227		    "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3228	}
3229
3230	sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3231	WRITE_ONCE(sq->running, 1);
3232}
3233
3234static void
3235mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3236{
3237        int i;
3238
3239	for (i = 0; i < ch->num_tc; i++)
3240		mlx5e_resume_sq(&ch->sq[i]);
3241}
3242
3243static void
3244mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3245{
3246	struct mlx5e_rq *rq = &ch->rq;
3247	int err;
3248
3249	mtx_lock(&rq->mtx);
3250	rq->enabled = 0;
3251	callout_stop(&rq->watchdog);
3252	mtx_unlock(&rq->mtx);
3253
3254	callout_drain(&rq->watchdog);
3255
3256	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3257	if (err != 0) {
3258		if_printf(rq->ifp,
3259		    "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3260	}
3261
3262	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3263		msleep(1);
3264		rq->cq.mcq.comp(&rq->cq.mcq);
3265	}
3266
3267	/*
3268	 * Transitioning into RST state will allow the FW to track less ERR state queues,
3269	 * thus reducing the recv queue flushing time
3270	 */
3271	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3272	if (err != 0) {
3273		if_printf(rq->ifp,
3274		    "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3275	}
3276}
3277
3278static void
3279mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3280{
3281	struct mlx5e_rq *rq = &ch->rq;
3282	int err;
3283
3284	rq->wq.wqe_ctr = 0;
3285	mlx5_wq_ll_update_db_record(&rq->wq);
3286	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3287	if (err != 0) {
3288		if_printf(rq->ifp,
3289		    "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3290        }
3291
3292	rq->enabled = 1;
3293
3294	rq->cq.mcq.comp(&rq->cq.mcq);
3295}
3296
3297void
3298mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3299{
3300	int i;
3301
3302	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
3303		return;
3304
3305	for (i = 0; i < priv->params.num_channels; i++) {
3306		if (value)
3307			mlx5e_disable_tx_dma(&priv->channel[i]);
3308		else
3309			mlx5e_enable_tx_dma(&priv->channel[i]);
3310	}
3311}
3312
3313void
3314mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3315{
3316	int i;
3317
3318	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
3319		return;
3320
3321	for (i = 0; i < priv->params.num_channels; i++) {
3322		if (value)
3323			mlx5e_disable_rx_dma(&priv->channel[i]);
3324		else
3325			mlx5e_enable_rx_dma(&priv->channel[i]);
3326	}
3327}
3328
3329static void
3330mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3331{
3332	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3333	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3334	    sysctl_firmware, "A", "HCA firmware version");
3335
3336	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3337	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3338	    "Board ID");
3339}
3340
3341static int
3342mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3343{
3344	struct mlx5e_priv *priv = arg1;
3345	uint8_t temp[MLX5E_MAX_PRIORITY];
3346	uint32_t tx_pfc;
3347	int err;
3348	int i;
3349
3350	PRIV_LOCK(priv);
3351
3352	tx_pfc = priv->params.tx_priority_flow_control;
3353
3354	for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
3355		temp[i] = (tx_pfc >> i) & 1;
3356
3357	err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
3358	if (err || !req->newptr)
3359		goto done;
3360	err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
3361	if (err)
3362		goto done;
3363
3364	priv->params.tx_priority_flow_control = 0;
3365
3366	/* range check input value */
3367	for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
3368		if (temp[i] > 1) {
3369			err = ERANGE;
3370			goto done;
3371		}
3372		priv->params.tx_priority_flow_control |= (temp[i] << i);
3373	}
3374
3375	/* check if update is required */
3376	if (tx_pfc != priv->params.tx_priority_flow_control)
3377		err = -mlx5e_set_port_pfc(priv);
3378done:
3379	if (err != 0)
3380		priv->params.tx_priority_flow_control= tx_pfc;
3381	PRIV_UNLOCK(priv);
3382
3383	return (err);
3384}
3385
3386static int
3387mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3388{
3389	struct mlx5e_priv *priv = arg1;
3390	uint8_t temp[MLX5E_MAX_PRIORITY];
3391	uint32_t rx_pfc;
3392	int err;
3393	int i;
3394
3395	PRIV_LOCK(priv);
3396
3397	rx_pfc = priv->params.rx_priority_flow_control;
3398
3399	for (i = 0; i != MLX5E_MAX_PRIORITY; i++)
3400		temp[i] = (rx_pfc >> i) & 1;
3401
3402	err = SYSCTL_OUT(req, temp, MLX5E_MAX_PRIORITY);
3403	if (err || !req->newptr)
3404		goto done;
3405	err = SYSCTL_IN(req, temp, MLX5E_MAX_PRIORITY);
3406	if (err)
3407		goto done;
3408
3409	priv->params.rx_priority_flow_control = 0;
3410
3411	/* range check input value */
3412	for (i = 0; i != MLX5E_MAX_PRIORITY; i++) {
3413		if (temp[i] > 1) {
3414			err = ERANGE;
3415			goto done;
3416		}
3417		priv->params.rx_priority_flow_control |= (temp[i] << i);
3418	}
3419
3420	/* check if update is required */
3421	if (rx_pfc != priv->params.rx_priority_flow_control)
3422		err = -mlx5e_set_port_pfc(priv);
3423done:
3424	if (err != 0)
3425		priv->params.rx_priority_flow_control= rx_pfc;
3426	PRIV_UNLOCK(priv);
3427
3428	return (err);
3429}
3430
3431static void
3432mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3433{
3434#if (__FreeBSD_version < 1100000)
3435	char path[96];
3436#endif
3437	int error;
3438
3439	/* enable pauseframes by default */
3440	priv->params.tx_pauseframe_control = 1;
3441	priv->params.rx_pauseframe_control = 1;
3442
3443	/* disable ports flow control, PFC, by default */
3444	priv->params.tx_priority_flow_control = 0;
3445	priv->params.rx_priority_flow_control = 0;
3446
3447#if (__FreeBSD_version < 1100000)
3448	/* compute path for sysctl */
3449	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3450	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3451
3452	/* try to fetch tunable, if any */
3453	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3454
3455	/* compute path for sysctl */
3456	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3457	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3458
3459	/* try to fetch tunable, if any */
3460	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3461#endif
3462
3463	/* register pauseframe SYSCTLs */
3464	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3465	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3466	    &priv->params.tx_pauseframe_control, 0,
3467	    "Set to enable TX pause frames. Clear to disable.");
3468
3469	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3470	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3471	    &priv->params.rx_pauseframe_control, 0,
3472	    "Set to enable RX pause frames. Clear to disable.");
3473
3474	/* register priority flow control, PFC, SYSCTLs */
3475	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3476	    OID_AUTO, "tx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
3477	    CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_tx_priority_flow_control, "CU",
3478	    "Set to enable TX ports flow control frames for priorities 0..7. Clear to disable.");
3479
3480	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3481	    OID_AUTO, "rx_priority_flow_control", CTLTYPE_U8 | CTLFLAG_RWTUN |
3482	    CTLFLAG_MPSAFE, priv, 0, &mlx5e_sysctl_rx_priority_flow_control, "CU",
3483	    "Set to enable RX ports flow control frames for priorities 0..7. Clear to disable.");
3484
3485	PRIV_LOCK(priv);
3486
3487	/* range check */
3488	priv->params.tx_pauseframe_control =
3489	    priv->params.tx_pauseframe_control ? 1 : 0;
3490	priv->params.rx_pauseframe_control =
3491	    priv->params.rx_pauseframe_control ? 1 : 0;
3492
3493	/* update firmware */
3494	error = mlx5e_set_port_pause_and_pfc(priv);
3495	if (error == -EINVAL) {
3496		if_printf(priv->ifp,
3497		    "Global pauseframes must be disabled before enabling PFC.\n");
3498		priv->params.rx_priority_flow_control = 0;
3499		priv->params.tx_priority_flow_control = 0;
3500
3501		/* update firmware */
3502		(void) mlx5e_set_port_pause_and_pfc(priv);
3503	}
3504	PRIV_UNLOCK(priv);
3505}
3506
3507static void *
3508mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3509{
3510	struct ifnet *ifp;
3511	struct mlx5e_priv *priv;
3512	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3513	struct sysctl_oid_list *child;
3514	int ncv = mdev->priv.eq_table.num_comp_vectors;
3515	char unit[16];
3516	int err;
3517	int i;
3518	u32 eth_proto_cap;
3519
3520	if (mlx5e_check_required_hca_cap(mdev)) {
3521		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3522		return (NULL);
3523	}
3524	/*
3525	 * Try to allocate the priv and make room for worst-case
3526	 * number of channel structures:
3527	 */
3528	priv = malloc(sizeof(*priv) +
3529	    (sizeof(priv->channel[0]) * mdev->priv.eq_table.num_comp_vectors),
3530	    M_MLX5EN, M_WAITOK | M_ZERO);
3531	mlx5e_priv_mtx_init(priv);
3532
3533	ifp = priv->ifp = if_alloc(IFT_ETHER);
3534	if (ifp == NULL) {
3535		mlx5_core_err(mdev, "if_alloc() failed\n");
3536		goto err_free_priv;
3537	}
3538	ifp->if_softc = priv;
3539	if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
3540	ifp->if_mtu = ETHERMTU;
3541	ifp->if_init = mlx5e_open;
3542	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3543	ifp->if_ioctl = mlx5e_ioctl;
3544	ifp->if_transmit = mlx5e_xmit;
3545	ifp->if_qflush = if_qflush;
3546#if (__FreeBSD_version >= 1100000)
3547	ifp->if_get_counter = mlx5e_get_counter;
3548#endif
3549	ifp->if_snd.ifq_maxlen = ifqmaxlen;
3550	/*
3551         * Set driver features
3552         */
3553	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3554	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3555	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3556	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3557	ifp->if_capabilities |= IFCAP_LRO;
3558	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3559	ifp->if_capabilities |= IFCAP_HWSTATS;
3560
3561	/* set TSO limits so that we don't have to drop TX packets */
3562	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3563	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3564	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3565
3566	ifp->if_capenable = ifp->if_capabilities;
3567	ifp->if_hwassist = 0;
3568	if (ifp->if_capenable & IFCAP_TSO)
3569		ifp->if_hwassist |= CSUM_TSO;
3570	if (ifp->if_capenable & IFCAP_TXCSUM)
3571		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3572	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3573		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3574
3575	/* ifnet sysctl tree */
3576	sysctl_ctx_init(&priv->sysctl_ctx);
3577	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3578	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3579	if (priv->sysctl_ifnet == NULL) {
3580		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3581		goto err_free_sysctl;
3582	}
3583	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3584	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3585	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3586	if (priv->sysctl_ifnet == NULL) {
3587		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3588		goto err_free_sysctl;
3589	}
3590
3591	/* HW sysctl tree */
3592	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3593	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3594	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3595	if (priv->sysctl_hw == NULL) {
3596		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3597		goto err_free_sysctl;
3598	}
3599
3600	err = mlx5e_build_ifp_priv(mdev, priv, ncv);
3601	if (err) {
3602		mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
3603		goto err_free_sysctl;
3604	}
3605
3606	snprintf(unit, sizeof(unit), "mce%u_wq",
3607	    device_get_unit(mdev->pdev->dev.bsddev));
3608	priv->wq = alloc_workqueue(unit, 0, 1);
3609	if (priv->wq == NULL) {
3610		if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
3611		goto err_free_sysctl;
3612	}
3613
3614	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3615	if (err) {
3616		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3617		    __func__, err);
3618		goto err_free_wq;
3619	}
3620	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3621	if (err) {
3622		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3623		    __func__, err);
3624		goto err_unmap_free_uar;
3625	}
3626	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3627	if (err) {
3628		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3629		    __func__, err);
3630		goto err_dealloc_pd;
3631	}
3632	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3633	if (err) {
3634		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3635		    __func__, err);
3636		goto err_dealloc_transport_domain;
3637	}
3638	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3639
3640	/* check if we should generate a random MAC address */
3641	if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3642	    is_zero_ether_addr(dev_addr)) {
3643		random_ether_addr(dev_addr);
3644		if_printf(ifp, "Assigned random MAC address\n");
3645	}
3646
3647	/* set default MTU */
3648	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3649
3650	/* Set default media status */
3651	priv->media_status_last = IFM_AVALID;
3652	priv->media_active_last = IFM_ETHER | IFM_AUTO |
3653	    IFM_ETH_RXPAUSE | IFM_FDX;
3654
3655	/* setup default pauseframes configuration */
3656	mlx5e_setup_pauseframes(priv);
3657
3658	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3659	if (err) {
3660		eth_proto_cap = 0;
3661		if_printf(ifp, "%s: Query port media capability failed, %d\n",
3662		    __func__, err);
3663	}
3664
3665	/* Setup supported medias */
3666	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3667	    mlx5e_media_change, mlx5e_media_status);
3668
3669	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3670		if (mlx5e_mode_table[i].baudrate == 0)
3671			continue;
3672		if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3673			ifmedia_add(&priv->media,
3674			    mlx5e_mode_table[i].subtype |
3675			    IFM_ETHER, 0, NULL);
3676			ifmedia_add(&priv->media,
3677			    mlx5e_mode_table[i].subtype |
3678			    IFM_ETHER | IFM_FDX |
3679			    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3680		}
3681	}
3682
3683	/* Additional supported medias */
3684	ifmedia_add(&priv->media, IFM_10G_LR | IFM_ETHER, 0, NULL);
3685	ifmedia_add(&priv->media, IFM_10G_LR |
3686	    IFM_ETHER | IFM_FDX |
3687	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3688
3689	ifmedia_add(&priv->media, IFM_40G_ER4 | IFM_ETHER, 0, NULL);
3690	ifmedia_add(&priv->media, IFM_40G_ER4 |
3691	    IFM_ETHER | IFM_FDX |
3692	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3693
3694	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3695	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3696	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3697
3698	/* Set autoselect by default */
3699	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3700	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3701	ether_ifattach(ifp, dev_addr);
3702
3703	/* Register for VLAN events */
3704	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3705	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3706	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3707	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3708
3709	/* Link is down by default */
3710	if_link_state_change(ifp, LINK_STATE_DOWN);
3711
3712	mlx5e_enable_async_events(priv);
3713
3714	mlx5e_add_hw_stats(priv);
3715
3716	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3717	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3718	    priv->stats.vport.arg);
3719
3720	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3721	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3722	    priv->stats.pport.arg);
3723
3724	mlx5e_create_ethtool(priv);
3725
3726	mtx_lock(&priv->async_events_mtx);
3727	mlx5e_update_stats(priv);
3728	mtx_unlock(&priv->async_events_mtx);
3729
3730	return (priv);
3731
3732err_dealloc_transport_domain:
3733	mlx5_dealloc_transport_domain(mdev, priv->tdn);
3734
3735err_dealloc_pd:
3736	mlx5_core_dealloc_pd(mdev, priv->pdn);
3737
3738err_unmap_free_uar:
3739	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3740
3741err_free_wq:
3742	destroy_workqueue(priv->wq);
3743
3744err_free_sysctl:
3745	sysctl_ctx_free(&priv->sysctl_ctx);
3746	if (priv->sysctl_debug)
3747		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3748	if_free(ifp);
3749
3750err_free_priv:
3751	mlx5e_priv_mtx_destroy(priv);
3752	free(priv, M_MLX5EN);
3753	return (NULL);
3754}
3755
3756static void
3757mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3758{
3759	struct mlx5e_priv *priv = vpriv;
3760	struct ifnet *ifp = priv->ifp;
3761
3762	/* don't allow more IOCTLs */
3763	priv->gone = 1;
3764
3765	/* XXX wait a bit to allow IOCTL handlers to complete */
3766	pause("W", hz);
3767
3768	/* stop watchdog timer */
3769	callout_drain(&priv->watchdog);
3770
3771	if (priv->vlan_attach != NULL)
3772		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3773	if (priv->vlan_detach != NULL)
3774		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3775
3776	/* make sure device gets closed */
3777	PRIV_LOCK(priv);
3778	mlx5e_close_locked(ifp);
3779	PRIV_UNLOCK(priv);
3780
3781	/* unregister device */
3782	ifmedia_removeall(&priv->media);
3783	ether_ifdetach(ifp);
3784	if_free(ifp);
3785
3786	/* destroy all remaining sysctl nodes */
3787	sysctl_ctx_free(&priv->stats.vport.ctx);
3788	sysctl_ctx_free(&priv->stats.pport.ctx);
3789	if (priv->sysctl_debug)
3790		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3791	sysctl_ctx_free(&priv->sysctl_ctx);
3792
3793	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3794	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3795	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3796	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3797	mlx5e_disable_async_events(priv);
3798	destroy_workqueue(priv->wq);
3799	mlx5e_priv_mtx_destroy(priv);
3800	free(priv, M_MLX5EN);
3801}
3802
3803static void *
3804mlx5e_get_ifp(void *vpriv)
3805{
3806	struct mlx5e_priv *priv = vpriv;
3807
3808	return (priv->ifp);
3809}
3810
3811static struct mlx5_interface mlx5e_interface = {
3812	.add = mlx5e_create_ifp,
3813	.remove = mlx5e_destroy_ifp,
3814	.event = mlx5e_async_event,
3815	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3816	.get_dev = mlx5e_get_ifp,
3817};
3818
3819void
3820mlx5e_init(void)
3821{
3822	mlx5_register_interface(&mlx5e_interface);
3823}
3824
3825void
3826mlx5e_cleanup(void)
3827{
3828	mlx5_unregister_interface(&mlx5e_interface);
3829}
3830
3831static void
3832mlx5e_show_version(void __unused *arg)
3833{
3834
3835	printf("%s", mlx5e_version);
3836}
3837SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL);
3838
3839module_init_order(mlx5e_init, SI_ORDER_THIRD);
3840module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3841
3842#if (__FreeBSD_version >= 1100000)
3843MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3844#endif
3845MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3846MODULE_VERSION(mlx5en, 1);
3847