1/*-
2 * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2022 NVIDIA corporation & affiliates.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include "opt_kern_tls.h"
28#include "opt_rss.h"
29#include "opt_ratelimit.h"
30
31#include <dev/mlx5/mlx5_en/en.h>
32#include <machine/atomic.h>
33
34static inline bool
35mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq)
36{
37	sq->cev_counter++;
38	/* interleave the CQEs */
39	if (sq->cev_counter >= sq->cev_factor) {
40		sq->cev_counter = 0;
41		return (true);
42	}
43	return (false);
44}
45
46bool
47mlx5e_do_send_cqe(struct mlx5e_sq *sq)
48{
49
50	return (mlx5e_do_send_cqe_inline(sq));
51}
52
53void
54mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt)
55{
56	u16 pi = sq->pc & sq->wq.sz_m1;
57	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
58
59	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
60
61	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
62	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
63	if (mlx5e_do_send_cqe_inline(sq))
64		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
65	else
66		wqe->ctrl.fm_ce_se = 0;
67
68	/* Copy data for doorbell */
69	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
70
71	sq->mbuf[pi].mbuf = NULL;
72	sq->mbuf[pi].num_bytes = 0;
73	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
74	sq->pc += sq->mbuf[pi].num_wqebbs;
75}
76
77static uint32_t mlx5e_hash_value;
78
79static void
80mlx5e_hash_init(void *arg)
81{
82	mlx5e_hash_value = m_ether_tcpip_hash_init();
83}
84
85/* Make kernel call mlx5e_hash_init after the random stack finished initializing */
86SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
87
88static struct mlx5e_sq *
89mlx5e_select_queue_by_send_tag(if_t ifp, struct mbuf *mb)
90{
91	struct m_snd_tag *mb_tag;
92	struct mlx5e_sq *sq;
93
94	mb_tag = mb->m_pkthdr.snd_tag;
95
96#ifdef KERN_TLS
97top:
98#endif
99	/* get pointer to sendqueue */
100	switch (mb_tag->sw->type) {
101#ifdef RATELIMIT
102	case IF_SND_TAG_TYPE_RATE_LIMIT:
103		sq = container_of(mb_tag,
104		    struct mlx5e_rl_channel, tag)->sq;
105		break;
106#ifdef KERN_TLS
107	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
108		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
109		goto top;
110#endif
111#endif
112	case IF_SND_TAG_TYPE_UNLIMITED:
113		sq = &container_of(mb_tag,
114		    struct mlx5e_channel, tag)->sq[0];
115		KASSERT((mb_tag->refcount > 0),
116		    ("mlx5e_select_queue: Channel refs are zero for unlimited tag"));
117		break;
118#ifdef KERN_TLS
119	case IF_SND_TAG_TYPE_TLS:
120		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
121		goto top;
122#endif
123	default:
124		sq = NULL;
125		break;
126	}
127
128	/* check if valid */
129	if (sq != NULL && READ_ONCE(sq->running) != 0)
130		return (sq);
131
132	return (NULL);
133}
134
135static struct mlx5e_sq *
136mlx5e_select_queue(if_t ifp, struct mbuf *mb)
137{
138	struct mlx5e_priv *priv = if_getsoftc(ifp);
139	struct mlx5e_sq *sq;
140	u32 ch;
141	u32 tc;
142
143	/* obtain VLAN information if present */
144	if (mb->m_flags & M_VLANTAG) {
145		tc = (mb->m_pkthdr.ether_vtag >> 13);
146		if (tc >= priv->num_tc)
147			tc = priv->default_vlan_prio;
148	} else {
149		tc = priv->default_vlan_prio;
150	}
151
152	ch = priv->params.num_channels;
153
154	/* check if flowid is set */
155	if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
156#ifdef RSS
157		u32 temp;
158
159		if (rss_hash2bucket(mb->m_pkthdr.flowid,
160		    M_HASHTYPE_GET(mb), &temp) == 0)
161			ch = temp % ch;
162		else
163#endif
164			ch = (mb->m_pkthdr.flowid % 128) % ch;
165	} else {
166		ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
167		    MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
168	}
169
170	/* check if send queue is running */
171	sq = &priv->channel[ch].sq[tc];
172	if (likely(READ_ONCE(sq->running) != 0))
173		return (sq);
174	return (NULL);
175}
176
177static inline u16
178mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb)
179{
180	struct ether_vlan_header *eh;
181	uint16_t eth_type;
182	int min_inline;
183
184	eh = mtod(mb, struct ether_vlan_header *);
185	if (unlikely(mb->m_len < ETHER_HDR_LEN)) {
186		goto max_inline;
187	} else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
188		if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)))
189			goto max_inline;
190		eth_type = ntohs(eh->evl_proto);
191		min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
192	} else {
193		eth_type = ntohs(eh->evl_encap_proto);
194		min_inline = ETHER_HDR_LEN;
195	}
196
197	switch (eth_type) {
198	case ETHERTYPE_IP:
199	case ETHERTYPE_IPV6:
200		/*
201		 * Make sure the TOS(IPv4) or traffic class(IPv6)
202		 * field gets inlined. Else the SQ may stall.
203		 */
204		min_inline += 4;
205		break;
206	default:
207		goto max_inline;
208	}
209
210	/*
211	 * m_copydata() will be used on the remaining header which
212	 * does not need to reside within the first m_len bytes of
213	 * data:
214	 */
215	if (mb->m_pkthdr.len < min_inline)
216		goto max_inline;
217	return (min_inline);
218
219max_inline:
220	return (MIN(mb->m_pkthdr.len, sq->max_inline));
221}
222
223/*
224 * This function parse IPv4 and IPv6 packets looking for TCP and UDP
225 * headers.
226 *
227 * Upon return the pointer at which the "ppth" argument points, is set
228 * to the location of the TCP header. NULL is used if no TCP header is
229 * present.
230 *
231 * The return value indicates the number of bytes from the beginning
232 * of the packet until the first byte after the TCP or UDP header. If
233 * this function returns zero, the parsing failed.
234 */
235int
236mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth)
237{
238	const struct ether_vlan_header *eh;
239	const struct tcphdr *th;
240	const struct ip *ip;
241	int ip_hlen, tcp_hlen;
242	const struct ip6_hdr *ip6;
243	uint16_t eth_type;
244	int eth_hdr_len;
245
246	eh = mtod(mb, const struct ether_vlan_header *);
247	if (unlikely(mb->m_len < ETHER_HDR_LEN))
248		goto failure;
249	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
250		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
251			goto failure;
252		eth_type = ntohs(eh->evl_proto);
253		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
254	} else {
255		eth_type = ntohs(eh->evl_encap_proto);
256		eth_hdr_len = ETHER_HDR_LEN;
257	}
258
259	switch (eth_type) {
260	case ETHERTYPE_IP:
261		ip = (const struct ip *)(mb->m_data + eth_hdr_len);
262		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip)))
263			goto failure;
264		switch (ip->ip_p) {
265		case IPPROTO_TCP:
266			ip_hlen = ip->ip_hl << 2;
267			eth_hdr_len += ip_hlen;
268			goto tcp_packet;
269		case IPPROTO_UDP:
270			ip_hlen = ip->ip_hl << 2;
271			eth_hdr_len += ip_hlen + sizeof(struct udphdr);
272			th = NULL;
273			goto udp_packet;
274		default:
275			goto failure;
276		}
277		break;
278	case ETHERTYPE_IPV6:
279		ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len);
280		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6)))
281			goto failure;
282		switch (ip6->ip6_nxt) {
283		case IPPROTO_TCP:
284			eth_hdr_len += sizeof(*ip6);
285			goto tcp_packet;
286		case IPPROTO_UDP:
287			eth_hdr_len += sizeof(*ip6) + sizeof(struct udphdr);
288			th = NULL;
289			goto udp_packet;
290		default:
291			goto failure;
292		}
293		break;
294	default:
295		goto failure;
296	}
297tcp_packet:
298	if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) {
299		const struct mbuf *m_th = mb->m_next;
300		if (unlikely(mb->m_len != eth_hdr_len ||
301		    m_th == NULL || m_th->m_len < sizeof(*th)))
302			goto failure;
303		th = (const struct tcphdr *)(m_th->m_data);
304	} else {
305		th = (const struct tcphdr *)(mb->m_data + eth_hdr_len);
306	}
307	tcp_hlen = th->th_off << 2;
308	eth_hdr_len += tcp_hlen;
309udp_packet:
310	/*
311	 * m_copydata() will be used on the remaining header which
312	 * does not need to reside within the first m_len bytes of
313	 * data:
314	 */
315	if (unlikely(mb->m_pkthdr.len < eth_hdr_len))
316		goto failure;
317	if (ppth != NULL)
318		*ppth = th;
319	return (eth_hdr_len);
320failure:
321	if (ppth != NULL)
322		*ppth = NULL;
323	return (0);
324}
325
326/*
327 * Locate a pointer inside a mbuf chain. Returns NULL upon failure.
328 */
329static inline void *
330mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len,
331    int min_len)
332{
333	if (unlikely(mb[0]->m_len == eth_hdr_len)) {
334		poffset[0] = eth_hdr_len;
335		if (unlikely((mb[0] = mb[0]->m_next) == NULL))
336			return (NULL);
337	}
338	if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len))
339		return (NULL);
340	return (mb[0]->m_data + eth_hdr_len - poffset[0]);
341}
342
343/*
344 * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN
345 * and TCP headers.
346 *
347 * The return value indicates the number of bytes from the beginning
348 * of the packet until the first byte after the TCP header. If this
349 * function returns zero, the parsing failed.
350 */
351static int
352mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe,
353    uint8_t cs_mask, uint8_t opcode)
354{
355	const struct ether_vlan_header *eh;
356	struct ip *ip4;
357	struct ip6_hdr *ip6;
358	struct tcphdr *th;
359	struct udphdr *udp;
360	bool has_outer_vlan_tag;
361	uint16_t eth_type;
362	uint8_t ip_type;
363	int pkt_hdr_len;
364	int eth_hdr_len;
365	int tcp_hlen;
366	int ip_hlen;
367	int offset;
368
369	pkt_hdr_len = mb->m_pkthdr.len;
370	has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0;
371	offset = 0;
372
373	eh = mtod(mb, const struct ether_vlan_header *);
374	if (unlikely(mb->m_len < ETHER_HDR_LEN))
375		return (0);
376
377	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
378		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
379			return (0);
380		eth_type = eh->evl_proto;
381		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
382	} else {
383		eth_type = eh->evl_encap_proto;
384		eth_hdr_len = ETHER_HDR_LEN;
385	}
386
387	switch (eth_type) {
388	case htons(ETHERTYPE_IP):
389		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
390		    sizeof(*ip4));
391		if (unlikely(ip4 == NULL))
392			return (0);
393		ip_type = ip4->ip_p;
394		if (unlikely(ip_type != IPPROTO_UDP))
395			return (0);
396		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
397		wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
398		ip_hlen = ip4->ip_hl << 2;
399		eth_hdr_len += ip_hlen;
400		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
401		    sizeof(*udp));
402		if (unlikely(udp == NULL))
403			return (0);
404		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
405		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE;
406		eth_hdr_len += sizeof(*udp);
407		break;
408	case htons(ETHERTYPE_IPV6):
409		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
410		    sizeof(*ip6));
411		if (unlikely(ip6 == NULL))
412			return (0);
413		ip_type = ip6->ip6_nxt;
414		if (unlikely(ip_type != IPPROTO_UDP))
415			return (0);
416		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
417		wqe->eth.cs_flags = MLX5_ETH_WQE_L4_CSUM;
418		eth_hdr_len += sizeof(*ip6);
419		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
420		    sizeof(*udp));
421		if (unlikely(udp == NULL))
422			return (0);
423		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
424		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE |
425		    MLX5_ETH_WQE_SWP_OUTER_L3_TYPE;
426		eth_hdr_len += sizeof(*udp);
427		break;
428	default:
429		return (0);
430	}
431
432	/*
433	 * If the hardware is not computing inner IP checksum, then
434	 * skip inlining the inner outer UDP and VXLAN header:
435	 */
436	if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0))
437		goto done;
438	if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
439	    8) == NULL))
440		return (0);
441	eth_hdr_len += 8;
442
443	/* Check for ethernet header again. */
444	eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN);
445	if (unlikely(eh == NULL))
446		return (0);
447	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
448		if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN +
449		    ETHER_VLAN_ENCAP_LEN))
450			return (0);
451		eth_type = eh->evl_proto;
452		eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
453	} else {
454		eth_type = eh->evl_encap_proto;
455		eth_hdr_len += ETHER_HDR_LEN;
456	}
457
458	/* Check for IP header again. */
459	switch (eth_type) {
460	case htons(ETHERTYPE_IP):
461		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
462		    sizeof(*ip4));
463		if (unlikely(ip4 == NULL))
464			return (0);
465		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
466		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
467		ip_type = ip4->ip_p;
468		ip_hlen = ip4->ip_hl << 2;
469		eth_hdr_len += ip_hlen;
470		break;
471	case htons(ETHERTYPE_IPV6):
472		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
473		    sizeof(*ip6));
474		if (unlikely(ip6 == NULL))
475			return (0);
476		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
477		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE;
478		ip_type = ip6->ip6_nxt;
479		eth_hdr_len += sizeof(*ip6);
480		break;
481	default:
482		return (0);
483	}
484
485	/*
486	 * If the hardware is not computing inner UDP/TCP checksum,
487	 * then skip inlining the inner UDP/TCP header:
488	 */
489	if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0))
490		goto done;
491
492	switch (ip_type) {
493	case IPPROTO_UDP:
494		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
495		    sizeof(*udp));
496		if (unlikely(udp == NULL))
497			return (0);
498		wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2);
499		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
500		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE;
501		eth_hdr_len += sizeof(*udp);
502		break;
503	case IPPROTO_TCP:
504		th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
505		    sizeof(*th));
506		if (unlikely(th == NULL))
507			return (0);
508		wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2;
509		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
510		tcp_hlen = th->th_off << 2;
511		eth_hdr_len += tcp_hlen;
512		break;
513	default:
514		return (0);
515	}
516done:
517	if (unlikely(pkt_hdr_len < eth_hdr_len))
518		return (0);
519
520	/* Account for software inserted VLAN tag, if any. */
521	if (unlikely(has_outer_vlan_tag)) {
522		wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
523		wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
524		wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
525		wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
526	}
527
528	/*
529	 * When inner checksums are set, outer L4 checksum flag must
530	 * be disabled.
531	 */
532	if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM |
533	    MLX5_ETH_WQE_L4_INNER_CSUM))
534		wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM;
535
536	return (eth_hdr_len);
537}
538
539struct mlx5_wqe_dump_seg {
540	struct mlx5_wqe_ctrl_seg ctrl;
541	struct mlx5_wqe_data_seg data;
542} __aligned(MLX5_SEND_WQE_BB);
543
544CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1);
545
546int
547mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp)
548{
549	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
550	struct mlx5_wqe_dump_seg *wqe;
551	struct mlx5_wqe_dump_seg *wqe_last;
552	int nsegs;
553	int xsegs;
554	u32 off;
555	u32 msb;
556	int err;
557	int x;
558	struct mbuf *mb;
559	const u32 ds_cnt = 2;
560	u16 pi;
561	const u8 opcode = MLX5_OPCODE_DUMP;
562
563	/* get pointer to mbuf */
564	mb = *mbp;
565
566	/* get producer index */
567	pi = sq->pc & sq->wq.sz_m1;
568
569	sq->mbuf[pi].num_bytes = mb->m_pkthdr.len;
570	sq->mbuf[pi].num_wqebbs = 0;
571
572	/* check number of segments in mbuf */
573	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
574	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
575	if (err == EFBIG) {
576		/* update statistics */
577		sq->stats.defragged++;
578		/* too many mbuf fragments */
579		mb = m_defrag(*mbp, M_NOWAIT);
580		if (mb == NULL) {
581			mb = *mbp;
582			goto tx_drop;
583		}
584		/* try again */
585		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
586		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
587	}
588
589	if (err != 0)
590		goto tx_drop;
591
592	/* make sure all mbuf data, if any, is visible to the bus */
593	bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
594	    BUS_DMASYNC_PREWRITE);
595
596	/* compute number of real DUMP segments */
597	msb = sq->priv->params_ethtool.hw_mtu_msb;
598	for (x = xsegs = 0; x != nsegs; x++)
599		xsegs += howmany((u32)segs[x].ds_len, msb);
600
601	/* check if there are no segments */
602	if (unlikely(xsegs == 0)) {
603		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
604		m_freem(mb);
605		*mbp = NULL;	/* safety clear */
606		return (0);
607	}
608
609	/* return ENOBUFS if the queue is full */
610	if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) {
611		sq->stats.enobuf++;
612		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
613		m_freem(mb);
614		*mbp = NULL;	/* safety clear */
615		return (ENOBUFS);
616	}
617
618	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
619	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1);
620
621	for (x = 0; x != nsegs; x++) {
622		for (off = 0; off < segs[x].ds_len; off += msb) {
623			u32 len = segs[x].ds_len - off;
624
625			/* limit length */
626			if (likely(len > msb))
627				len = msb;
628
629			memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
630
631			/* fill control segment */
632			wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
633			wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
634			wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8);
635
636			/* fill data segment */
637			wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off);
638			wqe->data.lkey = sq->mkey_be;
639			wqe->data.byte_count = cpu_to_be32(len);
640
641			/* advance to next building block */
642			if (unlikely(wqe == wqe_last))
643				wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0);
644			else
645				wqe++;
646
647			sq->mbuf[pi].num_wqebbs++;
648			sq->pc++;
649		}
650	}
651
652	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
653	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1);
654
655	/* put in place data fence */
656	wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL;
657
658	/* check if we should generate a completion event */
659	if (mlx5e_do_send_cqe_inline(sq))
660		wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
661
662	/* copy data for doorbell */
663	memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32));
664
665	/* store pointer to mbuf */
666	sq->mbuf[pi].mbuf = mb;
667	sq->mbuf[pi].mst = m_snd_tag_ref(parg->mst);
668
669	/* count all traffic going out */
670	sq->stats.packets++;
671	sq->stats.bytes += sq->mbuf[pi].num_bytes;
672
673	*mbp = NULL;	/* safety clear */
674	return (0);
675
676tx_drop:
677	sq->stats.dropped++;
678	*mbp = NULL;
679	m_freem(mb);
680	return err;
681}
682
683int
684mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
685{
686	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
687	struct mlx5e_xmit_args args = {};
688	struct mlx5_wqe_data_seg *dseg;
689	struct mlx5e_tx_wqe *wqe;
690	if_t ifp;
691	int nsegs;
692	int err;
693	int x;
694	struct mbuf *mb;
695	u16 ds_cnt;
696	u16 pi;
697	u8 opcode;
698
699#ifdef KERN_TLS
700top:
701#endif
702	/* Return ENOBUFS if the queue is full */
703	if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
704		sq->stats.enobuf++;
705		return (ENOBUFS);
706	}
707
708	/* Align SQ edge with NOPs to avoid WQE wrap around */
709	pi = ((~sq->pc) & sq->wq.sz_m1);
710	if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
711		/* Send one multi NOP message instead of many */
712		mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
713		pi = ((~sq->pc) & sq->wq.sz_m1);
714		if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
715			sq->stats.enobuf++;
716			return (ENOMEM);
717		}
718	}
719
720#ifdef KERN_TLS
721	/* Special handling for TLS packets, if any */
722	switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) {
723	case MLX5E_TLS_LOOP:
724		goto top;
725	case MLX5E_TLS_FAILURE:
726		mb = *mbp;
727		err = ENOMEM;
728		goto tx_drop;
729	case MLX5E_TLS_DEFERRED:
730		return (0);
731	case MLX5E_TLS_CONTINUE:
732	default:
733		break;
734	}
735#endif
736
737	/* Setup local variables */
738	pi = sq->pc & sq->wq.sz_m1;
739	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
740	ifp = sq->ifp;
741
742	memset(wqe, 0, sizeof(*wqe));
743
744	/* get pointer to mbuf */
745	mb = *mbp;
746
747	/* Send a copy of the frame to the BPF listener, if any */
748	if (ifp != NULL)
749		ETHER_BPF_MTAP(ifp, mb);
750
751	if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
752		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
753	}
754	if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
755		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
756	}
757	if (wqe->eth.cs_flags == 0) {
758		sq->stats.csum_offload_none++;
759	}
760	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
761		u32 payload_len;
762		u32 mss = mb->m_pkthdr.tso_segsz;
763		u32 num_pkts;
764
765		wqe->eth.mss = cpu_to_be16(mss);
766		opcode = MLX5_OPCODE_LSO;
767		if (args.ihs == 0)
768			args.ihs = mlx5e_get_full_header_size(mb, NULL);
769		if (unlikely(args.ihs == 0)) {
770			err = EINVAL;
771			goto tx_drop;
772		}
773		payload_len = mb->m_pkthdr.len - args.ihs;
774		if (payload_len == 0)
775			num_pkts = 1;
776		else
777			num_pkts = DIV_ROUND_UP(payload_len, mss);
778		sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs);
779
780
781		sq->stats.tso_packets++;
782		sq->stats.tso_bytes += payload_len;
783	} else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) {
784		/* check for inner TCP TSO first */
785		if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO |
786		    CSUM_INNER_IP6_TSO)) {
787			u32 payload_len;
788			u32 mss = mb->m_pkthdr.tso_segsz;
789			u32 num_pkts;
790
791			wqe->eth.mss = cpu_to_be16(mss);
792			opcode = MLX5_OPCODE_LSO;
793
794			if (likely(args.ihs == 0)) {
795				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
796				       MLX5_ETH_WQE_L3_INNER_CSUM |
797				       MLX5_ETH_WQE_L4_INNER_CSUM |
798				       MLX5_ETH_WQE_L4_CSUM |
799				       MLX5_ETH_WQE_L3_CSUM,
800				       opcode);
801				if (unlikely(args.ihs == 0)) {
802					err = EINVAL;
803					goto tx_drop;
804				}
805			}
806
807			payload_len = mb->m_pkthdr.len - args.ihs;
808			if (payload_len == 0)
809				num_pkts = 1;
810			else
811				num_pkts = DIV_ROUND_UP(payload_len, mss);
812			sq->mbuf[pi].num_bytes = payload_len +
813			    num_pkts * args.ihs;
814
815			sq->stats.tso_packets++;
816			sq->stats.tso_bytes += payload_len;
817		} else {
818			opcode = MLX5_OPCODE_SEND;
819
820			if (likely(args.ihs == 0)) {
821				uint8_t cs_mask;
822
823				if (mb->m_pkthdr.csum_flags &
824				    (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP |
825				     CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)) {
826					cs_mask =
827					    MLX5_ETH_WQE_L3_INNER_CSUM |
828					    MLX5_ETH_WQE_L4_INNER_CSUM |
829					    MLX5_ETH_WQE_L4_CSUM |
830					    MLX5_ETH_WQE_L3_CSUM;
831				} else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) {
832					cs_mask =
833					    MLX5_ETH_WQE_L3_INNER_CSUM |
834					    MLX5_ETH_WQE_L4_CSUM |
835					    MLX5_ETH_WQE_L3_CSUM;
836				} else {
837					cs_mask =
838					    MLX5_ETH_WQE_L4_CSUM |
839					    MLX5_ETH_WQE_L3_CSUM;
840				}
841				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
842				    cs_mask, opcode);
843				if (unlikely(args.ihs == 0)) {
844					err = EINVAL;
845					goto tx_drop;
846				}
847			}
848
849			sq->mbuf[pi].num_bytes = max_t (unsigned int,
850			    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
851		}
852	} else {
853		opcode = MLX5_OPCODE_SEND;
854
855		if (args.ihs == 0) {
856			switch (sq->min_inline_mode) {
857			case MLX5_INLINE_MODE_IP:
858			case MLX5_INLINE_MODE_TCP_UDP:
859				args.ihs = mlx5e_get_full_header_size(mb, NULL);
860				if (unlikely(args.ihs == 0))
861					args.ihs = mlx5e_get_l2_header_size(sq, mb);
862				break;
863			case MLX5_INLINE_MODE_L2:
864				args.ihs = mlx5e_get_l2_header_size(sq, mb);
865				break;
866			case MLX5_INLINE_MODE_NONE:
867				/* FALLTHROUGH */
868			default:
869				if ((mb->m_flags & M_VLANTAG) != 0 &&
870				    (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) {
871					/* inlining VLAN data is not required */
872					wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
873					wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
874					args.ihs = 0;
875				} else if ((mb->m_flags & M_VLANTAG) == 0 &&
876				    (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) {
877					/* inlining non-VLAN data is not required */
878					args.ihs = 0;
879				} else {
880					/* we are forced to inlining L2 header, if any */
881					args.ihs = mlx5e_get_l2_header_size(sq, mb);
882				}
883				break;
884			}
885		}
886		sq->mbuf[pi].num_bytes = max_t (unsigned int,
887		    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
888	}
889
890	if (likely(args.ihs == 0)) {
891		/* nothing to inline */
892	} else if ((mb->m_flags & M_VLANTAG) != 0) {
893		struct ether_vlan_header *eh = (struct ether_vlan_header *)
894		    wqe->eth.inline_hdr_start;
895
896		/* Range checks */
897		if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) {
898			if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) {
899				err = EINVAL;
900				goto tx_drop;
901			}
902			args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN);
903		} else if (unlikely(args.ihs < ETHER_HDR_LEN)) {
904			err = EINVAL;
905			goto tx_drop;
906		}
907		m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
908		m_adj(mb, ETHER_HDR_LEN);
909		/* Insert 4 bytes VLAN tag into data stream */
910		eh->evl_proto = eh->evl_encap_proto;
911		eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
912		eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
913		/* Copy rest of header data, if any */
914		m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
915		m_adj(mb, args.ihs - ETHER_HDR_LEN);
916		/* Extend header by 4 bytes */
917		args.ihs += ETHER_VLAN_ENCAP_LEN;
918		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
919	} else {
920		/* check if inline header size is too big */
921		if (unlikely(args.ihs > sq->max_inline)) {
922			if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO |
923			    CSUM_ENCAP_VXLAN))) {
924				err = EINVAL;
925				goto tx_drop;
926			}
927			args.ihs = sq->max_inline;
928		}
929		m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start);
930		m_adj(mb, args.ihs);
931		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
932	}
933
934	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
935	if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) {
936		ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start),
937		    MLX5_SEND_WQE_DS);
938	}
939	dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
940
941	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
942	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
943	if (err == EFBIG) {
944		/* Update statistics */
945		sq->stats.defragged++;
946		/* Too many mbuf fragments */
947		mb = m_defrag(*mbp, M_NOWAIT);
948		if (mb == NULL) {
949			mb = *mbp;
950			goto tx_drop;
951		}
952		/* Try again */
953		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
954		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
955	}
956	/* Catch errors */
957	if (err != 0)
958		goto tx_drop;
959
960	/* Make sure all mbuf data, if any, is visible to the bus */
961	if (nsegs != 0) {
962		bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
963		    BUS_DMASYNC_PREWRITE);
964	} else {
965		/* All data was inlined, free the mbuf. */
966		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
967		m_freem(mb);
968		mb = NULL;
969	}
970
971	for (x = 0; x != nsegs; x++) {
972		if (segs[x].ds_len == 0)
973			continue;
974		dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
975		dseg->lkey = sq->mkey_be;
976		dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
977		dseg++;
978	}
979
980	ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
981
982	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
983	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
984	wqe->ctrl.imm = cpu_to_be32(args.tisn << 8);
985
986	if (mlx5e_do_send_cqe_inline(sq))
987		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
988	else
989		wqe->ctrl.fm_ce_se = 0;
990
991	/* Copy data for doorbell */
992	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
993
994	/* Store pointer to mbuf */
995	sq->mbuf[pi].mbuf = mb;
996	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
997	if (unlikely(args.mst != NULL))
998		sq->mbuf[pi].mst = m_snd_tag_ref(args.mst);
999	else
1000		MPASS(sq->mbuf[pi].mst == NULL);
1001
1002	sq->pc += sq->mbuf[pi].num_wqebbs;
1003
1004	/* Count all traffic going out */
1005	sq->stats.packets++;
1006	sq->stats.bytes += sq->mbuf[pi].num_bytes;
1007
1008	*mbp = NULL;	/* safety clear */
1009	return (0);
1010
1011tx_drop:
1012	sq->stats.dropped++;
1013	*mbp = NULL;
1014	m_freem(mb);
1015	return err;
1016}
1017
1018static void
1019mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
1020{
1021	u16 sqcc;
1022
1023	/*
1024	 * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
1025	 * otherwise a cq overrun may occur
1026	 */
1027	sqcc = sq->cc;
1028
1029	while (budget > 0) {
1030		struct mlx5_cqe64 *cqe;
1031		struct m_snd_tag *mst;
1032		struct mbuf *mb;
1033		bool match;
1034		u16 sqcc_this;
1035		u16 delta;
1036		u16 x;
1037		u16 ci;
1038
1039		cqe = mlx5e_get_cqe(&sq->cq);
1040		if (!cqe)
1041			break;
1042
1043		mlx5_cqwq_pop(&sq->cq.wq);
1044
1045		/* check if the completion event indicates an error */
1046		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
1047			mlx5e_dump_err_cqe(&sq->cq, sq->sqn, (const void *)cqe);
1048			sq->stats.cqe_err++;
1049		}
1050
1051		/* setup local variables */
1052		sqcc_this = be16toh(cqe->wqe_counter);
1053		match = false;
1054
1055		/* update budget according to the event factor */
1056		budget -= sq->cev_factor;
1057
1058		for (x = 0;; x++) {
1059			if (unlikely(match != false)) {
1060				break;
1061			} else if (unlikely(x == sq->cev_factor)) {
1062				/* WQE counter match not found */
1063				sq->stats.cqe_err++;
1064				break;
1065			}
1066			ci = sqcc & sq->wq.sz_m1;
1067			delta = sqcc_this - sqcc;
1068			match = (delta < sq->mbuf[ci].num_wqebbs);
1069			mb = sq->mbuf[ci].mbuf;
1070			sq->mbuf[ci].mbuf = NULL;
1071			mst = sq->mbuf[ci].mst;
1072			sq->mbuf[ci].mst = NULL;
1073
1074			if (unlikely(mb == NULL)) {
1075				if (unlikely(sq->mbuf[ci].num_bytes == 0))
1076					sq->stats.nop++;
1077			} else {
1078				bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
1079				    BUS_DMASYNC_POSTWRITE);
1080				bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
1081
1082				/* Free transmitted mbuf */
1083				m_freem(mb);
1084			}
1085
1086			if (unlikely(mst != NULL))
1087				m_snd_tag_rele(mst);
1088
1089			sqcc += sq->mbuf[ci].num_wqebbs;
1090		}
1091	}
1092
1093	mlx5_cqwq_update_db_record(&sq->cq.wq);
1094
1095	/* Ensure cq space is freed before enabling more cqes */
1096	atomic_thread_fence_rel();
1097
1098	sq->cc = sqcc;
1099}
1100
1101static int
1102mlx5e_xmit_locked(if_t ifp, struct mlx5e_sq *sq, struct mbuf *mb)
1103{
1104	int err = 0;
1105
1106	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
1107	    READ_ONCE(sq->running) == 0)) {
1108		m_freem(mb);
1109		return (ENETDOWN);
1110	}
1111
1112	/* Do transmit */
1113	if (mlx5e_sq_xmit(sq, &mb) != 0) {
1114		/* NOTE: m_freem() is NULL safe */
1115		m_freem(mb);
1116		err = ENOBUFS;
1117	}
1118
1119	/* Write the doorbell record, if any. */
1120	mlx5e_tx_notify_hw(sq, false);
1121
1122	/*
1123	 * Check if we need to start the event timer which flushes the
1124	 * transmit ring on timeout:
1125	 */
1126	if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
1127	    sq->cev_factor != 1)) {
1128		/* start the timer */
1129		mlx5e_sq_cev_timeout(sq);
1130	} else {
1131		/* don't send NOPs yet */
1132		sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1133	}
1134	return (err);
1135}
1136
1137int
1138mlx5e_xmit(if_t ifp, struct mbuf *mb)
1139{
1140	struct mlx5e_sq *sq;
1141	int ret;
1142
1143	if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
1144		MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
1145		sq = mlx5e_select_queue_by_send_tag(ifp, mb);
1146		if (unlikely(sq == NULL)) {
1147			goto select_queue;
1148		}
1149	} else {
1150select_queue:
1151		sq = mlx5e_select_queue(ifp, mb);
1152		if (unlikely(sq == NULL)) {
1153			/* Free mbuf */
1154			m_freem(mb);
1155
1156			/* Invalid send queue */
1157			return (ENXIO);
1158		}
1159	}
1160
1161	mtx_lock(&sq->lock);
1162	ret = mlx5e_xmit_locked(ifp, sq, mb);
1163	mtx_unlock(&sq->lock);
1164
1165	return (ret);
1166}
1167
1168void
1169mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
1170{
1171	struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
1172
1173	mtx_lock(&sq->comp_lock);
1174	mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
1175	mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
1176	mtx_unlock(&sq->comp_lock);
1177}
1178