1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2018-2019 Chelsio Communications, Inc.
5 * All rights reserved.
6 * Written by: John Baldwin <jhb@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include "opt_inet.h"
31#include "opt_inet6.h"
32#include "opt_kern_tls.h"
33
34#include <sys/param.h>
35#include <sys/ktr.h>
36#include <sys/ktls.h>
37#include <sys/sglist.h>
38#include <sys/socket.h>
39#include <sys/socketvar.h>
40#include <sys/sockbuf.h>
41#include <netinet/in.h>
42#include <netinet/in_pcb.h>
43#include <netinet/ip.h>
44#include <netinet/ip6.h>
45#include <netinet/tcp_var.h>
46#include <opencrypto/cryptodev.h>
47#include <opencrypto/xform.h>
48
49#include "common/common.h"
50#include "common/t4_regs.h"
51#include "common/t4_regs_values.h"
52#include "common/t4_tcb.h"
53#include "t4_l2t.h"
54#include "t4_clip.h"
55#include "t4_mp_ring.h"
56#include "crypto/t4_crypto.h"
57
58#if defined(INET) || defined(INET6)
59
60#define TLS_HEADER_LENGTH		5
61
62struct tls_scmd {
63	__be32 seqno_numivs;
64	__be32 ivgen_hdrlen;
65};
66
67struct tlspcb {
68	struct m_snd_tag com;
69	struct vi_info *vi;	/* virtual interface */
70	struct adapter *sc;
71	struct l2t_entry *l2te;	/* L2 table entry used by this connection */
72	struct sge_txq *txq;
73	int tid;		/* Connection identifier */
74
75	int tx_key_addr;
76	bool inline_key;
77	bool using_timestamps;
78	unsigned char enc_mode;
79
80	struct tls_scmd scmd0;
81	struct tls_scmd scmd0_short;
82
83	unsigned int tx_key_info_size;
84
85	uint32_t prev_seq;
86	uint32_t prev_ack;
87	uint32_t prev_tsecr;
88	uint16_t prev_win;
89	uint16_t prev_mss;
90
91	/* Only used outside of setup and teardown when using inline keys. */
92	struct tls_keyctx keyctx;
93
94	/* Fields only used during setup and teardown. */
95	struct inpcb *inp;	/* backpointer to host stack's PCB */
96	struct sge_wrq *ctrlq;
97	struct clip_entry *ce;	/* CLIP table entry used by this tid */
98
99	bool open_pending;
100};
101
102static void t6_tls_tag_free(struct m_snd_tag *mst);
103static int ktls_setup_keys(struct tlspcb *tlsp,
104    const struct ktls_session *tls, struct sge_txq *txq);
105
106static const struct if_snd_tag_sw t6_tls_tag_sw = {
107	.snd_tag_free = t6_tls_tag_free,
108	.type = IF_SND_TAG_TYPE_TLS
109};
110
111static inline struct tlspcb *
112mst_to_tls(struct m_snd_tag *t)
113{
114	return (__containerof(t, struct tlspcb, com));
115}
116
117static struct tlspcb *
118alloc_tlspcb(if_t ifp, struct vi_info *vi, int flags)
119{
120	struct port_info *pi = vi->pi;
121	struct adapter *sc = pi->adapter;
122	struct tlspcb *tlsp;
123
124	tlsp = malloc(sizeof(*tlsp), M_CXGBE, M_ZERO | flags);
125	if (tlsp == NULL)
126		return (NULL);
127
128	m_snd_tag_init(&tlsp->com, ifp, &t6_tls_tag_sw);
129	tlsp->vi = vi;
130	tlsp->sc = sc;
131	tlsp->ctrlq = &sc->sge.ctrlq[pi->port_id];
132	tlsp->tid = -1;
133	tlsp->tx_key_addr = -1;
134
135	return (tlsp);
136}
137
138static int
139ktls_act_open_cpl_size(bool isipv6)
140{
141
142	if (isipv6)
143		return (sizeof(struct cpl_t6_act_open_req6));
144	else
145		return (sizeof(struct cpl_t6_act_open_req));
146}
147
148static void
149mk_ktls_act_open_req(struct adapter *sc, struct vi_info *vi, struct inpcb *inp,
150    struct tlspcb *tlsp, int atid, void *dst)
151{
152	struct tcpcb *tp = intotcpcb(inp);
153	struct cpl_t6_act_open_req *cpl6;
154	struct cpl_act_open_req *cpl;
155	uint64_t options;
156	int qid_atid;
157
158	cpl6 = dst;
159	cpl = (struct cpl_act_open_req *)cpl6;
160	INIT_TP_WR(cpl6, 0);
161	qid_atid = V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) |
162	    V_TID_COOKIE(CPL_COOKIE_KERN_TLS);
163	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
164		qid_atid));
165	inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port,
166	    &cpl->peer_ip, &cpl->peer_port);
167
168	options = F_TCAM_BYPASS | V_ULP_MODE(ULP_MODE_NONE);
169	options |= V_SMAC_SEL(vi->smt_idx) | V_TX_CHAN(vi->pi->tx_chan);
170	options |= F_NON_OFFLOAD;
171	cpl->opt0 = htobe64(options);
172
173	options = V_TX_QUEUE(TX_MODQ(vi->pi->tx_chan));
174	if (tp->t_flags & TF_REQ_TSTMP)
175		options |= F_TSTAMPS_EN;
176	cpl->opt2 = htobe32(options);
177}
178
179static void
180mk_ktls_act_open_req6(struct adapter *sc, struct vi_info *vi,
181    struct inpcb *inp, struct tlspcb *tlsp, int atid, void *dst)
182{
183	struct tcpcb *tp = intotcpcb(inp);
184	struct cpl_t6_act_open_req6 *cpl6;
185	struct cpl_act_open_req6 *cpl;
186	uint64_t options;
187	int qid_atid;
188
189	cpl6 = dst;
190	cpl = (struct cpl_act_open_req6 *)cpl6;
191	INIT_TP_WR(cpl6, 0);
192	qid_atid = V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) |
193	    V_TID_COOKIE(CPL_COOKIE_KERN_TLS);
194	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
195		qid_atid));
196	cpl->local_port = inp->inp_lport;
197	cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
198	cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
199	cpl->peer_port = inp->inp_fport;
200	cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0];
201	cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8];
202
203	options = F_TCAM_BYPASS | V_ULP_MODE(ULP_MODE_NONE);
204	options |= V_SMAC_SEL(vi->smt_idx) | V_TX_CHAN(vi->pi->tx_chan);
205	options |= F_NON_OFFLOAD;
206	cpl->opt0 = htobe64(options);
207
208	options = V_TX_QUEUE(TX_MODQ(vi->pi->tx_chan));
209	if (tp->t_flags & TF_REQ_TSTMP)
210		options |= F_TSTAMPS_EN;
211	cpl->opt2 = htobe32(options);
212}
213
214static int
215send_ktls_act_open_req(struct adapter *sc, struct vi_info *vi,
216    struct inpcb *inp, struct tlspcb *tlsp, int atid)
217{
218	struct wrqe *wr;
219	bool isipv6;
220
221	isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
222	if (isipv6) {
223		tlsp->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true);
224		if (tlsp->ce == NULL)
225			return (ENOENT);
226	}
227
228	wr = alloc_wrqe(ktls_act_open_cpl_size(isipv6), tlsp->ctrlq);
229	if (wr == NULL) {
230		CTR2(KTR_CXGBE, "%s: atid %d failed to alloc WR", __func__,
231		    atid);
232		return (ENOMEM);
233	}
234
235	if (isipv6)
236		mk_ktls_act_open_req6(sc, vi, inp, tlsp, atid, wrtod(wr));
237	else
238		mk_ktls_act_open_req(sc, vi, inp, tlsp, atid, wrtod(wr));
239
240	tlsp->open_pending = true;
241	t4_wrq_tx(sc, wr);
242	return (0);
243}
244
245static int
246ktls_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
247    struct mbuf *m)
248{
249	struct adapter *sc = iq->adapter;
250	const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1);
251	u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status)));
252	u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status));
253	struct tlspcb *tlsp = lookup_atid(sc, atid);
254	struct inpcb *inp = tlsp->inp;
255
256	CTR3(KTR_CXGBE, "%s: atid %d status %d", __func__, atid, status);
257	free_atid(sc, atid);
258	if (status == 0)
259		tlsp->tid = GET_TID(cpl);
260
261	INP_WLOCK(inp);
262	tlsp->open_pending = false;
263	wakeup(tlsp);
264	INP_WUNLOCK(inp);
265	return (0);
266}
267
268/* SET_TCB_FIELD sent as a ULP command looks like this */
269#define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \
270    sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core))
271
272_Static_assert((LEN__SET_TCB_FIELD_ULP + sizeof(struct ulptx_idata)) % 16 == 0,
273    "CPL_SET_TCB_FIELD ULP command not 16-byte aligned");
274
275static void
276write_set_tcb_field_ulp(struct tlspcb *tlsp, void *dst, struct sge_txq *txq,
277    uint16_t word, uint64_t mask, uint64_t val)
278{
279	struct ulp_txpkt *txpkt;
280	struct ulptx_idata *idata;
281	struct cpl_set_tcb_field_core *cpl;
282
283	/* ULP_TXPKT */
284	txpkt = dst;
285	txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
286	    V_ULP_TXPKT_DATAMODIFY(0) |
287	    V_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) | V_ULP_TXPKT_DEST(0) |
288	    V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1));
289	txpkt->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16));
290
291	/* ULPTX_IDATA sub-command */
292	idata = (struct ulptx_idata *)(txpkt + 1);
293	idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
294	idata->len = htobe32(sizeof(*cpl));
295
296	/* CPL_SET_TCB_FIELD */
297	cpl = (struct cpl_set_tcb_field_core *)(idata + 1);
298	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tlsp->tid));
299	cpl->reply_ctrl = htobe16(F_NO_REPLY);
300	cpl->word_cookie = htobe16(V_WORD(word));
301	cpl->mask = htobe64(mask);
302	cpl->val = htobe64(val);
303
304	/* ULPTX_NOOP */
305	idata = (struct ulptx_idata *)(cpl + 1);
306	idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
307	idata->len = htobe32(0);
308}
309
310static int
311ktls_set_tcb_fields(struct tlspcb *tlsp, struct tcpcb *tp, struct sge_txq *txq)
312{
313	struct fw_ulptx_wr *wr;
314	struct mbuf *m;
315	char *dst;
316	void *items[1];
317	int error, len;
318
319	len = sizeof(*wr) + 3 * roundup2(LEN__SET_TCB_FIELD_ULP, 16);
320	if (tp->t_flags & TF_REQ_TSTMP)
321		len += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
322	m = alloc_wr_mbuf(len, M_NOWAIT);
323	if (m == NULL) {
324		CTR2(KTR_CXGBE, "%s: tid %d failed to alloc WR mbuf", __func__,
325		    tlsp->tid);
326		return (ENOMEM);
327	}
328	m->m_pkthdr.snd_tag = m_snd_tag_ref(&tlsp->com);
329	m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
330
331	/* FW_ULPTX_WR */
332	wr = mtod(m, void *);
333	wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR));
334	wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA |
335	    V_FW_WR_LEN16(len / 16));
336	wr->cookie = 0;
337	dst = (char *)(wr + 1);
338
339        /* Clear TF_NON_OFFLOAD and set TF_CORE_BYPASS */
340	write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_T_FLAGS,
341	    V_TCB_T_FLAGS(V_TF_CORE_BYPASS(1) | V_TF_NON_OFFLOAD(1)),
342	    V_TCB_T_FLAGS(V_TF_CORE_BYPASS(1)));
343	dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
344
345	/* Clear the SND_UNA_RAW, SND_NXT_RAW, and SND_MAX_RAW offsets. */
346	write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_SND_UNA_RAW,
347	    V_TCB_SND_NXT_RAW(M_TCB_SND_NXT_RAW) |
348	    V_TCB_SND_UNA_RAW(M_TCB_SND_UNA_RAW),
349	    V_TCB_SND_NXT_RAW(0) | V_TCB_SND_UNA_RAW(0));
350	dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
351
352	write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_SND_MAX_RAW,
353	    V_TCB_SND_MAX_RAW(M_TCB_SND_MAX_RAW), V_TCB_SND_MAX_RAW(0));
354	dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
355
356	if (tp->t_flags & TF_REQ_TSTMP) {
357		write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_TIMESTAMP_OFFSET,
358		    V_TCB_TIMESTAMP_OFFSET(M_TCB_TIMESTAMP_OFFSET),
359		    V_TCB_TIMESTAMP_OFFSET(tp->ts_offset >> 28));
360		dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
361	}
362
363	KASSERT(dst - (char *)wr == len, ("%s: length mismatch", __func__));
364
365	items[0] = m;
366	error = mp_ring_enqueue(txq->r, items, 1, 1);
367	if (error)
368		m_free(m);
369	return (error);
370}
371
372int
373t6_tls_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params,
374    struct m_snd_tag **pt)
375{
376	const struct ktls_session *tls;
377	struct tlspcb *tlsp;
378	struct adapter *sc;
379	struct vi_info *vi;
380	struct inpcb *inp;
381	struct tcpcb *tp;
382	struct sge_txq *txq;
383	int atid, error, explicit_iv_size, keyid, mac_first;
384
385	tls = params->tls.tls;
386
387	/* Only TLS 1.1 and TLS 1.2 are currently supported. */
388	if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE ||
389	    tls->params.tls_vminor < TLS_MINOR_VER_ONE ||
390	    tls->params.tls_vminor > TLS_MINOR_VER_TWO)
391		return (EPROTONOSUPPORT);
392
393	/* Sanity check values in *tls. */
394	switch (tls->params.cipher_algorithm) {
395	case CRYPTO_AES_CBC:
396		/* XXX: Explicitly ignore any provided IV. */
397		switch (tls->params.cipher_key_len) {
398		case 128 / 8:
399		case 192 / 8:
400		case 256 / 8:
401			break;
402		default:
403			return (EINVAL);
404		}
405		switch (tls->params.auth_algorithm) {
406		case CRYPTO_SHA1_HMAC:
407		case CRYPTO_SHA2_256_HMAC:
408		case CRYPTO_SHA2_384_HMAC:
409			break;
410		default:
411			return (EPROTONOSUPPORT);
412		}
413		explicit_iv_size = AES_BLOCK_LEN;
414		mac_first = 1;
415		break;
416	case CRYPTO_AES_NIST_GCM_16:
417		if (tls->params.iv_len != SALT_SIZE)
418			return (EINVAL);
419		switch (tls->params.cipher_key_len) {
420		case 128 / 8:
421		case 192 / 8:
422		case 256 / 8:
423			break;
424		default:
425			return (EINVAL);
426		}
427		explicit_iv_size = 8;
428		mac_first = 0;
429		break;
430	default:
431		return (EPROTONOSUPPORT);
432	}
433
434	vi = if_getsoftc(ifp);
435	sc = vi->adapter;
436
437	tlsp = alloc_tlspcb(ifp, vi, M_WAITOK);
438
439	atid = alloc_atid(sc, tlsp);
440	if (atid < 0) {
441		error = ENOMEM;
442		goto failed;
443	}
444
445	if (sc->tlst.inline_keys)
446		keyid = -1;
447	else
448		keyid = t4_alloc_tls_keyid(sc);
449	if (keyid < 0) {
450		CTR2(KTR_CXGBE, "%s: atid %d using immediate key ctx", __func__,
451		    atid);
452		tlsp->inline_key = true;
453	} else {
454		tlsp->tx_key_addr = keyid;
455		CTR3(KTR_CXGBE, "%s: atid %d allocated TX key addr %#x",
456		    __func__,
457		    atid, tlsp->tx_key_addr);
458	}
459
460	inp = params->tls.inp;
461	INP_RLOCK(inp);
462	if (inp->inp_flags & INP_DROPPED) {
463		INP_RUNLOCK(inp);
464		error = ECONNRESET;
465		goto failed;
466	}
467	tlsp->inp = inp;
468
469	tp = intotcpcb(inp);
470	if (tp->t_flags & TF_REQ_TSTMP) {
471		tlsp->using_timestamps = true;
472		if ((tp->ts_offset & 0xfffffff) != 0) {
473			INP_RUNLOCK(inp);
474			error = EINVAL;
475			goto failed;
476		}
477	} else
478		tlsp->using_timestamps = false;
479
480	error = send_ktls_act_open_req(sc, vi, inp, tlsp, atid);
481	if (error) {
482		INP_RUNLOCK(inp);
483		goto failed;
484	}
485
486	/* Wait for reply to active open. */
487	CTR2(KTR_CXGBE, "%s: atid %d sent CPL_ACT_OPEN_REQ", __func__,
488	    atid);
489	while (tlsp->open_pending) {
490		/*
491		 * XXX: PCATCH?  We would then have to discard the PCB
492		 * when the completion CPL arrived.
493		 */
494		error = rw_sleep(tlsp, &inp->inp_lock, 0, "t6tlsop", 0);
495	}
496
497	atid = -1;
498	if (tlsp->tid < 0) {
499		INP_RUNLOCK(inp);
500		error = ENOMEM;
501		goto failed;
502	}
503
504	if (inp->inp_flags & INP_DROPPED) {
505		INP_RUNLOCK(inp);
506		error = ECONNRESET;
507		goto failed;
508	}
509
510	txq = &sc->sge.txq[vi->first_txq];
511	if (inp->inp_flowtype != M_HASHTYPE_NONE)
512		txq += ((inp->inp_flowid % (vi->ntxq - vi->rsrv_noflowq)) +
513		    vi->rsrv_noflowq);
514	tlsp->txq = txq;
515
516	error = ktls_set_tcb_fields(tlsp, tp, txq);
517	INP_RUNLOCK(inp);
518	if (error)
519		goto failed;
520
521	error = ktls_setup_keys(tlsp, tls, txq);
522	if (error)
523		goto failed;
524
525	tlsp->enc_mode = t4_tls_cipher_mode(tls);
526	tlsp->tx_key_info_size = t4_tls_key_info_size(tls);
527
528	/* The SCMD fields used when encrypting a full TLS record. */
529	tlsp->scmd0.seqno_numivs = htobe32(V_SCMD_SEQ_NO_CTRL(3) |
530	    V_SCMD_PROTO_VERSION(t4_tls_proto_ver(tls)) |
531	    V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
532	    V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
533	    V_SCMD_CIPH_MODE(tlsp->enc_mode) |
534	    V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) |
535	    V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) |
536	    V_SCMD_IV_SIZE(explicit_iv_size / 2) | V_SCMD_NUM_IVS(1));
537
538	tlsp->scmd0.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
539	    V_SCMD_TLS_FRAG_ENABLE(0);
540	if (tlsp->inline_key)
541		tlsp->scmd0.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1);
542	tlsp->scmd0.ivgen_hdrlen = htobe32(tlsp->scmd0.ivgen_hdrlen);
543
544	/*
545	 * The SCMD fields used when encrypting a partial TLS record
546	 * (no trailer and possibly a truncated payload).
547	 */
548	tlsp->scmd0_short.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0) |
549	    V_SCMD_PROTO_VERSION(SCMD_PROTO_VERSION_GENERIC) |
550	    V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
551	    V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
552	    V_SCMD_AUTH_MODE(SCMD_AUTH_MODE_NOP) |
553	    V_SCMD_HMAC_CTRL(SCMD_HMAC_CTRL_NOP) |
554	    V_SCMD_IV_SIZE(AES_BLOCK_LEN / 2) | V_SCMD_NUM_IVS(0);
555	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM)
556		tlsp->scmd0_short.seqno_numivs |=
557		    V_SCMD_CIPH_MODE(SCMD_CIPH_MODE_AES_CTR);
558	else
559		tlsp->scmd0_short.seqno_numivs |=
560		    V_SCMD_CIPH_MODE(tlsp->enc_mode);
561	tlsp->scmd0_short.seqno_numivs =
562	    htobe32(tlsp->scmd0_short.seqno_numivs);
563
564	tlsp->scmd0_short.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
565	    V_SCMD_TLS_FRAG_ENABLE(0) |
566	    V_SCMD_AADIVDROP(1);
567	if (tlsp->inline_key)
568		tlsp->scmd0_short.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1);
569
570	TXQ_LOCK(txq);
571	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM)
572		txq->kern_tls_gcm++;
573	else
574		txq->kern_tls_cbc++;
575	TXQ_UNLOCK(txq);
576	*pt = &tlsp->com;
577	return (0);
578
579failed:
580	if (atid >= 0)
581		free_atid(sc, atid);
582	m_snd_tag_rele(&tlsp->com);
583	return (error);
584}
585
586static int
587ktls_setup_keys(struct tlspcb *tlsp, const struct ktls_session *tls,
588    struct sge_txq *txq)
589{
590	struct tls_key_req *kwr;
591	struct tls_keyctx *kctx;
592	void *items[1];
593	struct mbuf *m;
594	int error;
595
596	/*
597	 * Store the salt and keys in the key context.  For
598	 * connections with an inline key, this key context is passed
599	 * as immediate data in each work request.  For connections
600	 * storing the key in DDR, a work request is used to store a
601	 * copy of the key context in DDR.
602	 */
603	t4_tls_key_ctx(tls, KTLS_TX, &tlsp->keyctx);
604	if (tlsp->inline_key)
605		return (0);
606
607	/* Populate key work request. */
608        m = alloc_wr_mbuf(TLS_KEY_WR_SZ, M_NOWAIT);
609	if (m == NULL) {
610		CTR2(KTR_CXGBE, "%s: tid %d failed to alloc WR mbuf", __func__,
611		    tlsp->tid);
612		return (ENOMEM);
613	}
614	m->m_pkthdr.snd_tag = m_snd_tag_ref(&tlsp->com);
615	m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
616	kwr = mtod(m, void *);
617	memset(kwr, 0, TLS_KEY_WR_SZ);
618
619	t4_write_tlskey_wr(tls, KTLS_TX, tlsp->tid, 0, tlsp->tx_key_addr, kwr);
620	kctx = (struct tls_keyctx *)(kwr + 1);
621	memcpy(kctx, &tlsp->keyctx, sizeof(*kctx));
622
623	/*
624	 * Place the key work request in the transmit queue.  It
625	 * should be sent to the NIC before any TLS packets using this
626	 * session.
627	 */
628	items[0] = m;
629	error = mp_ring_enqueue(txq->r, items, 1, 1);
630	if (error)
631		m_free(m);
632	else
633		CTR2(KTR_CXGBE, "%s: tid %d sent key WR", __func__, tlsp->tid);
634	return (error);
635}
636
637static u_int
638ktls_base_wr_size(struct tlspcb *tlsp)
639{
640	u_int wr_len;
641
642	wr_len = sizeof(struct fw_ulptx_wr);	// 16
643	wr_len += sizeof(struct ulp_txpkt);	// 8
644	wr_len += sizeof(struct ulptx_idata);	// 8
645	wr_len += sizeof(struct cpl_tx_sec_pdu);// 32
646	if (tlsp->inline_key)
647		wr_len += tlsp->tx_key_info_size;
648	else {
649		wr_len += sizeof(struct ulptx_sc_memrd);// 8
650		wr_len += sizeof(struct ulptx_idata);	// 8
651	}
652	wr_len += sizeof(struct cpl_tx_data);	// 16
653	return (wr_len);
654}
655
656/* How many bytes of TCP payload to send for a given TLS record. */
657static u_int
658ktls_tcp_payload_length(struct tlspcb *tlsp, struct mbuf *m_tls)
659{
660	struct tls_record_layer *hdr;
661	u_int plen, mlen;
662
663	M_ASSERTEXTPG(m_tls);
664	hdr = (void *)m_tls->m_epg_hdr;
665	plen = ntohs(hdr->tls_length);
666
667	/*
668	 * What range of the TLS record is the mbuf requesting to be
669	 * sent.
670	 */
671	mlen = mtod(m_tls, vm_offset_t) + m_tls->m_len;
672
673	/* Always send complete records. */
674	if (mlen == TLS_HEADER_LENGTH + plen)
675		return (mlen);
676
677	/*
678	 * If the host stack has asked to send part of the trailer,
679	 * trim the length to avoid sending any of the trailer.  There
680	 * is no way to send a partial trailer currently.
681	 */
682	if (mlen > TLS_HEADER_LENGTH + plen - m_tls->m_epg_trllen)
683		mlen = TLS_HEADER_LENGTH + plen - m_tls->m_epg_trllen;
684
685
686	/*
687	 * For AES-CBC adjust the ciphertext length for the block
688	 * size.
689	 */
690	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC &&
691	    mlen > TLS_HEADER_LENGTH) {
692		mlen = TLS_HEADER_LENGTH + rounddown(mlen - TLS_HEADER_LENGTH,
693		    AES_BLOCK_LEN);
694	}
695
696#ifdef VERBOSE_TRACES
697	CTR4(KTR_CXGBE, "%s: tid %d short TLS record (%u vs %u)",
698	    __func__, tlsp->tid, mlen, TLS_HEADER_LENGTH + plen);
699#endif
700	return (mlen);
701}
702
703/*
704 * For a "short" TLS record, determine the offset into the TLS record
705 * payload to send.  This offset does not include the TLS header, but
706 * a non-zero offset implies that a header will not be sent.
707 */
708static u_int
709ktls_payload_offset(struct tlspcb *tlsp, struct mbuf *m_tls)
710{
711	struct tls_record_layer *hdr;
712	u_int offset, plen;
713#ifdef INVARIANTS
714	u_int mlen;
715#endif
716
717	M_ASSERTEXTPG(m_tls);
718	hdr = (void *)m_tls->m_epg_hdr;
719	plen = ntohs(hdr->tls_length);
720#ifdef INVARIANTS
721	mlen = mtod(m_tls, vm_offset_t) + m_tls->m_len;
722	MPASS(mlen < TLS_HEADER_LENGTH + plen);
723#endif
724	if (mtod(m_tls, vm_offset_t) <= m_tls->m_epg_hdrlen)
725		return (0);
726	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
727		/*
728		 * Always send something.  This function is only called
729		 * if we aren't sending the tag at all, but if the
730		 * request starts in the tag then we are in an odd
731		 * state where would effectively send nothing.  Cap
732		 * the offset at the last byte of the record payload
733		 * to send the last cipher block.
734		 */
735		offset = min(mtod(m_tls, vm_offset_t) - m_tls->m_epg_hdrlen,
736		    (plen - TLS_HEADER_LENGTH - m_tls->m_epg_trllen) - 1);
737		return (rounddown(offset, AES_BLOCK_LEN));
738	}
739	return (0);
740}
741
742static u_int
743ktls_sgl_size(u_int nsegs)
744{
745	u_int wr_len;
746
747	/* First segment is part of ulptx_sgl. */
748	nsegs--;
749
750	wr_len = sizeof(struct ulptx_sgl);
751	wr_len += 8 * ((3 * nsegs) / 2 + (nsegs & 1));
752	return (wr_len);
753}
754
755static int
756ktls_wr_len(struct tlspcb *tlsp, struct mbuf *m, struct mbuf *m_tls,
757    int *nsegsp)
758{
759	struct tls_record_layer *hdr;
760	u_int imm_len, offset, plen, wr_len, tlen;
761
762	M_ASSERTEXTPG(m_tls);
763
764	/*
765	 * Determine the size of the TLS record payload to send
766	 * excluding header and trailer.
767	 */
768	tlen = ktls_tcp_payload_length(tlsp, m_tls);
769	if (tlen <= m_tls->m_epg_hdrlen) {
770		/*
771		 * For requests that only want to send the TLS header,
772		 * send a tunnelled packet as immediate data.
773		 */
774		wr_len = sizeof(struct fw_eth_tx_pkt_wr) +
775		    sizeof(struct cpl_tx_pkt_core) +
776		    roundup2(m->m_len + m_tls->m_len, 16);
777		if (wr_len > SGE_MAX_WR_LEN) {
778			CTR3(KTR_CXGBE,
779		    "%s: tid %d TLS header-only packet too long (len %d)",
780			    __func__, tlsp->tid, m->m_len + m_tls->m_len);
781		}
782
783		/* This should always be the last TLS record in a chain. */
784		MPASS(m_tls->m_next == NULL);
785		*nsegsp = 0;
786		return (wr_len);
787	}
788
789	hdr = (void *)m_tls->m_epg_hdr;
790	plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - m_tls->m_epg_trllen;
791	if (tlen < plen) {
792		plen = tlen;
793		offset = ktls_payload_offset(tlsp, m_tls);
794	} else
795		offset = 0;
796
797	/* Calculate the size of the work request. */
798	wr_len = ktls_base_wr_size(tlsp);
799
800	/*
801	 * Full records and short records with an offset of 0 include
802	 * the TLS header as immediate data.  Short records include a
803	 * raw AES IV as immediate data.
804	 */
805	imm_len = 0;
806	if (offset == 0)
807		imm_len += m_tls->m_epg_hdrlen;
808	if (plen == tlen)
809		imm_len += AES_BLOCK_LEN;
810	wr_len += roundup2(imm_len, 16);
811
812	/* TLS record payload via DSGL. */
813	*nsegsp = sglist_count_mbuf_epg(m_tls, m_tls->m_epg_hdrlen + offset,
814	    plen - (m_tls->m_epg_hdrlen + offset));
815	wr_len += ktls_sgl_size(*nsegsp);
816
817	wr_len = roundup2(wr_len, 16);
818	return (wr_len);
819}
820
821/*
822 * See if we have any TCP options requiring a dedicated options-only
823 * packet.
824 */
825static int
826ktls_has_tcp_options(struct tcphdr *tcp)
827{
828	u_char *cp;
829	int cnt, opt, optlen;
830
831	cp = (u_char *)(tcp + 1);
832	cnt = tcp->th_off * 4 - sizeof(struct tcphdr);
833	for (; cnt > 0; cnt -= optlen, cp += optlen) {
834		opt = cp[0];
835		if (opt == TCPOPT_EOL)
836			break;
837		if (opt == TCPOPT_NOP)
838			optlen = 1;
839		else {
840			if (cnt < 2)
841				break;
842			optlen = cp[1];
843			if (optlen < 2 || optlen > cnt)
844				break;
845		}
846		switch (opt) {
847		case TCPOPT_NOP:
848		case TCPOPT_TIMESTAMP:
849			break;
850		default:
851			return (1);
852		}
853	}
854	return (0);
855}
856
857/*
858 * Find the TCP timestamp option.
859 */
860static void *
861ktls_find_tcp_timestamps(struct tcphdr *tcp)
862{
863	u_char *cp;
864	int cnt, opt, optlen;
865
866	cp = (u_char *)(tcp + 1);
867	cnt = tcp->th_off * 4 - sizeof(struct tcphdr);
868	for (; cnt > 0; cnt -= optlen, cp += optlen) {
869		opt = cp[0];
870		if (opt == TCPOPT_EOL)
871			break;
872		if (opt == TCPOPT_NOP)
873			optlen = 1;
874		else {
875			if (cnt < 2)
876				break;
877			optlen = cp[1];
878			if (optlen < 2 || optlen > cnt)
879				break;
880		}
881		if (opt == TCPOPT_TIMESTAMP && optlen == TCPOLEN_TIMESTAMP)
882			return (cp + 2);
883	}
884	return (NULL);
885}
886
887int
888t6_ktls_parse_pkt(struct mbuf *m)
889{
890	struct tlspcb *tlsp;
891	struct ether_header *eh;
892	struct ip *ip;
893	struct ip6_hdr *ip6;
894	struct tcphdr *tcp;
895	struct mbuf *m_tls;
896	void *items[1];
897	int nsegs;
898	u_int wr_len, tot_len;
899
900	/*
901	 * Locate headers in initial mbuf.
902	 *
903	 * XXX: This assumes all of the headers are in the initial mbuf.
904	 * Could perhaps use m_advance() like parse_pkt() if that turns
905	 * out to not be true.
906	 */
907	M_ASSERTPKTHDR(m);
908	MPASS(m->m_pkthdr.snd_tag != NULL);
909	tlsp = mst_to_tls(m->m_pkthdr.snd_tag);
910
911	if (m->m_len <= sizeof(*eh) + sizeof(*ip)) {
912		CTR2(KTR_CXGBE, "%s: tid %d header mbuf too short", __func__,
913		    tlsp->tid);
914		return (EINVAL);
915	}
916	eh = mtod(m, struct ether_header *);
917	if (ntohs(eh->ether_type) != ETHERTYPE_IP &&
918	    ntohs(eh->ether_type) != ETHERTYPE_IPV6) {
919		CTR2(KTR_CXGBE, "%s: tid %d mbuf not ETHERTYPE_IP{,V6}",
920		    __func__, tlsp->tid);
921		return (EINVAL);
922	}
923	m->m_pkthdr.l2hlen = sizeof(*eh);
924
925	/* XXX: Reject unsupported IP options? */
926	if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
927		ip = (struct ip *)(eh + 1);
928		if (ip->ip_p != IPPROTO_TCP) {
929			CTR2(KTR_CXGBE, "%s: tid %d mbuf not IPPROTO_TCP",
930			    __func__, tlsp->tid);
931			return (EINVAL);
932		}
933		m->m_pkthdr.l3hlen = ip->ip_hl * 4;
934	} else {
935		ip6 = (struct ip6_hdr *)(eh + 1);
936		if (ip6->ip6_nxt != IPPROTO_TCP) {
937			CTR3(KTR_CXGBE, "%s: tid %d mbuf not IPPROTO_TCP (%u)",
938			    __func__, tlsp->tid, ip6->ip6_nxt);
939			return (EINVAL);
940		}
941		m->m_pkthdr.l3hlen = sizeof(struct ip6_hdr);
942	}
943	if (m->m_len < m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
944	    sizeof(*tcp)) {
945		CTR2(KTR_CXGBE, "%s: tid %d header mbuf too short (2)",
946		    __func__, tlsp->tid);
947		return (EINVAL);
948	}
949	tcp = (struct tcphdr *)((char *)(eh + 1) + m->m_pkthdr.l3hlen);
950	m->m_pkthdr.l4hlen = tcp->th_off * 4;
951
952	/* Bail if there is TCP payload before the TLS record. */
953	if (m->m_len != m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
954	    m->m_pkthdr.l4hlen) {
955		CTR6(KTR_CXGBE,
956		    "%s: tid %d header mbuf bad length (%d + %d + %d != %d)",
957		    __func__, tlsp->tid, m->m_pkthdr.l2hlen,
958		    m->m_pkthdr.l3hlen, m->m_pkthdr.l4hlen, m->m_len);
959		return (EINVAL);
960	}
961
962	/* Assume all headers are in 'm' for now. */
963	MPASS(m->m_next != NULL);
964	MPASS(m->m_next->m_flags & M_EXTPG);
965
966	tot_len = 0;
967
968	/*
969	 * Each of the remaining mbufs in the chain should reference a
970	 * TLS record.
971	 */
972	for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) {
973		MPASS(m_tls->m_flags & M_EXTPG);
974
975		wr_len = ktls_wr_len(tlsp, m, m_tls, &nsegs);
976#ifdef VERBOSE_TRACES
977		CTR4(KTR_CXGBE, "%s: tid %d wr_len %d nsegs %d", __func__,
978		    tlsp->tid, wr_len, nsegs);
979#endif
980		if (wr_len > SGE_MAX_WR_LEN || nsegs > TX_SGL_SEGS)
981			return (EFBIG);
982		tot_len += roundup2(wr_len, EQ_ESIZE);
983
984		/*
985		 * Store 'nsegs' for the first TLS record in the
986		 * header mbuf's metadata.
987		 */
988		if (m_tls == m->m_next)
989			set_mbuf_nsegs(m, nsegs);
990	}
991
992	MPASS(tot_len != 0);
993
994	/*
995	 * See if we have any TCP options or a FIN requiring a
996	 * dedicated packet.
997	 */
998	if ((tcp->th_flags & TH_FIN) != 0 || ktls_has_tcp_options(tcp)) {
999		wr_len = sizeof(struct fw_eth_tx_pkt_wr) +
1000		    sizeof(struct cpl_tx_pkt_core) + roundup2(m->m_len, 16);
1001		if (wr_len > SGE_MAX_WR_LEN) {
1002			CTR3(KTR_CXGBE,
1003			    "%s: tid %d options-only packet too long (len %d)",
1004			    __func__, tlsp->tid, m->m_len);
1005			return (EINVAL);
1006		}
1007		tot_len += roundup2(wr_len, EQ_ESIZE);
1008	}
1009
1010	/* Include room for a TP work request to program an L2T entry. */
1011	tot_len += EQ_ESIZE;
1012
1013	/*
1014	 * Include room for a ULPTX work request including up to 5
1015	 * CPL_SET_TCB_FIELD commands before the first TLS work
1016	 * request.
1017	 */
1018	wr_len = sizeof(struct fw_ulptx_wr) +
1019	    5 * roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1020
1021	/*
1022	 * If timestamps are present, reserve 1 more command for
1023	 * setting the echoed timestamp.
1024	 */
1025	if (tlsp->using_timestamps)
1026		wr_len += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1027
1028	tot_len += roundup2(wr_len, EQ_ESIZE);
1029
1030	set_mbuf_len16(m, tot_len / 16);
1031#ifdef VERBOSE_TRACES
1032	CTR4(KTR_CXGBE, "%s: tid %d len16 %d nsegs %d", __func__,
1033	    tlsp->tid, mbuf_len16(m), mbuf_nsegs(m));
1034#endif
1035	items[0] = m;
1036	return (mp_ring_enqueue(tlsp->txq->r, items, 1, 256));
1037}
1038
1039/*
1040 * If the SGL ends on an address that is not 16 byte aligned, this function will
1041 * add a 0 filled flit at the end.
1042 */
1043static void
1044write_gl_to_buf(struct sglist *gl, caddr_t to)
1045{
1046	struct sglist_seg *seg;
1047	__be64 *flitp;
1048	struct ulptx_sgl *usgl;
1049	int i, nflits, nsegs;
1050
1051	KASSERT(((uintptr_t)to & 0xf) == 0,
1052	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, to));
1053
1054	nsegs = gl->sg_nseg;
1055	MPASS(nsegs > 0);
1056
1057	nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2;
1058	flitp = (__be64 *)to;
1059	seg = &gl->sg_segs[0];
1060	usgl = (void *)flitp;
1061
1062	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
1063	    V_ULPTX_NSGE(nsegs));
1064	usgl->len0 = htobe32(seg->ss_len);
1065	usgl->addr0 = htobe64(seg->ss_paddr);
1066	seg++;
1067
1068	for (i = 0; i < nsegs - 1; i++, seg++) {
1069		usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len);
1070		usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr);
1071	}
1072	if (i & 1)
1073		usgl->sge[i / 2].len[1] = htobe32(0);
1074	flitp += nflits;
1075
1076	if (nflits & 1) {
1077		MPASS(((uintptr_t)flitp) & 0xf);
1078		*flitp++ = 0;
1079	}
1080
1081	MPASS((((uintptr_t)flitp) & 0xf) == 0);
1082}
1083
1084static inline void
1085copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
1086{
1087
1088	MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
1089	MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
1090
1091	if (__predict_true((uintptr_t)(*to) + len <=
1092	    (uintptr_t)&eq->desc[eq->sidx])) {
1093		bcopy(from, *to, len);
1094		(*to) += len;
1095		if ((uintptr_t)(*to) == (uintptr_t)&eq->desc[eq->sidx])
1096			(*to) = (caddr_t)eq->desc;
1097	} else {
1098		int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to);
1099
1100		bcopy(from, *to, portion);
1101		from += portion;
1102		portion = len - portion;	/* remaining */
1103		bcopy(from, (void *)eq->desc, portion);
1104		(*to) = (caddr_t)eq->desc + portion;
1105	}
1106}
1107
1108static int
1109ktls_write_tcp_options(struct sge_txq *txq, void *dst, struct mbuf *m,
1110    u_int available, u_int pidx)
1111{
1112	struct tx_sdesc *txsd;
1113	struct fw_eth_tx_pkt_wr *wr;
1114	struct cpl_tx_pkt_core *cpl;
1115	uint32_t ctrl;
1116	uint64_t ctrl1;
1117	int len16, ndesc, pktlen;
1118	struct ether_header *eh;
1119	struct ip *ip, newip;
1120	struct ip6_hdr *ip6, newip6;
1121	struct tcphdr *tcp, newtcp;
1122	caddr_t out;
1123
1124	TXQ_LOCK_ASSERT_OWNED(txq);
1125	M_ASSERTPKTHDR(m);
1126
1127	wr = dst;
1128	pktlen = m->m_len;
1129	ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen;
1130	len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16);
1131	ndesc = tx_len16_to_desc(len16);
1132	MPASS(ndesc <= available);
1133
1134	/* Firmware work request header */
1135	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
1136	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
1137
1138	ctrl = V_FW_WR_LEN16(len16);
1139	wr->equiq_to_len16 = htobe32(ctrl);
1140	wr->r3 = 0;
1141
1142	cpl = (void *)(wr + 1);
1143
1144	/* CPL header */
1145	cpl->ctrl0 = txq->cpl_ctrl0;
1146	cpl->pack = 0;
1147	cpl->len = htobe16(pktlen);
1148
1149	out = (void *)(cpl + 1);
1150
1151	/* Copy over Ethernet header. */
1152	eh = mtod(m, struct ether_header *);
1153	copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen);
1154
1155	/* Fixup length in IP header and copy out. */
1156	if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
1157		ip = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1158		newip = *ip;
1159		newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen);
1160		copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip));
1161		if (m->m_pkthdr.l3hlen > sizeof(*ip))
1162			copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out,
1163			    m->m_pkthdr.l3hlen - sizeof(*ip));
1164		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP) |
1165		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
1166		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
1167	} else {
1168		ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1169		newip6 = *ip6;
1170		newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen);
1171		copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6));
1172		MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6));
1173		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP6) |
1174		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
1175		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
1176	}
1177	cpl->ctrl1 = htobe64(ctrl1);
1178	txq->txcsum++;
1179
1180	/* Clear PUSH and FIN in the TCP header if present. */
1181	tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen);
1182	newtcp = *tcp;
1183	newtcp.th_flags &= ~(TH_PUSH | TH_FIN);
1184	copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp));
1185
1186	/* Copy rest of packet. */
1187	copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, pktlen -
1188	    (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp)));
1189	txq->imm_wrs++;
1190
1191	txq->txpkt_wrs++;
1192
1193	txq->kern_tls_options++;
1194
1195	txsd = &txq->sdesc[pidx];
1196	txsd->m = NULL;
1197	txsd->desc_used = ndesc;
1198
1199	return (ndesc);
1200}
1201
1202static int
1203ktls_write_tunnel_packet(struct sge_txq *txq, void *dst, struct mbuf *m,
1204    struct mbuf *m_tls, u_int available, tcp_seq tcp_seqno, u_int pidx)
1205{
1206	struct tx_sdesc *txsd;
1207	struct fw_eth_tx_pkt_wr *wr;
1208	struct cpl_tx_pkt_core *cpl;
1209	uint32_t ctrl;
1210	uint64_t ctrl1;
1211	int len16, ndesc, pktlen;
1212	struct ether_header *eh;
1213	struct ip *ip, newip;
1214	struct ip6_hdr *ip6, newip6;
1215	struct tcphdr *tcp, newtcp;
1216	caddr_t out;
1217
1218	TXQ_LOCK_ASSERT_OWNED(txq);
1219	M_ASSERTPKTHDR(m);
1220
1221	/* Locate the template TLS header. */
1222	M_ASSERTEXTPG(m_tls);
1223
1224	/* This should always be the last TLS record in a chain. */
1225	MPASS(m_tls->m_next == NULL);
1226
1227	wr = dst;
1228	pktlen = m->m_len + m_tls->m_len;
1229	ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen;
1230	len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16);
1231	ndesc = tx_len16_to_desc(len16);
1232	MPASS(ndesc <= available);
1233
1234	/* Firmware work request header */
1235	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
1236	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
1237
1238	ctrl = V_FW_WR_LEN16(len16);
1239	wr->equiq_to_len16 = htobe32(ctrl);
1240	wr->r3 = 0;
1241
1242	cpl = (void *)(wr + 1);
1243
1244	/* CPL header */
1245	cpl->ctrl0 = txq->cpl_ctrl0;
1246	cpl->pack = 0;
1247	cpl->len = htobe16(pktlen);
1248
1249	out = (void *)(cpl + 1);
1250
1251	/* Copy over Ethernet header. */
1252	eh = mtod(m, struct ether_header *);
1253	copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen);
1254
1255	/* Fixup length in IP header and copy out. */
1256	if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
1257		ip = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1258		newip = *ip;
1259		newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen);
1260		copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip));
1261		if (m->m_pkthdr.l3hlen > sizeof(*ip))
1262			copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out,
1263			    m->m_pkthdr.l3hlen - sizeof(*ip));
1264		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP) |
1265		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
1266		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
1267	} else {
1268		ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1269		newip6 = *ip6;
1270		newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen);
1271		copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6));
1272		MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6));
1273		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP6) |
1274		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
1275		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
1276	}
1277	cpl->ctrl1 = htobe64(ctrl1);
1278	txq->txcsum++;
1279
1280	/* Set sequence number in TCP header. */
1281	tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen);
1282	newtcp = *tcp;
1283	newtcp.th_seq = htonl(tcp_seqno + mtod(m_tls, vm_offset_t));
1284	copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp));
1285
1286	/* Copy rest of TCP header. */
1287	copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, m->m_len -
1288	    (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp)));
1289
1290	/* Copy the subset of the TLS header requested. */
1291	copy_to_txd(&txq->eq, (char *)m_tls->m_epg_hdr +
1292	    mtod(m_tls, vm_offset_t), &out, m_tls->m_len);
1293	txq->imm_wrs++;
1294
1295	txq->txpkt_wrs++;
1296
1297	txq->kern_tls_header++;
1298
1299	txsd = &txq->sdesc[pidx];
1300	txsd->m = m;
1301	txsd->desc_used = ndesc;
1302
1303	return (ndesc);
1304}
1305
1306_Static_assert(sizeof(struct cpl_set_tcb_field) <= EQ_ESIZE,
1307    "CPL_SET_TCB_FIELD must be smaller than a single TX descriptor");
1308_Static_assert(W_TCB_SND_UNA_RAW == W_TCB_SND_NXT_RAW,
1309    "SND_NXT_RAW and SND_UNA_RAW are in different words");
1310
1311static int
1312ktls_write_tls_wr(struct tlspcb *tlsp, struct sge_txq *txq, void *dst,
1313    struct mbuf *m, struct tcphdr *tcp, struct mbuf *m_tls, u_int available,
1314    tcp_seq tcp_seqno, uint32_t *tsopt, u_int pidx, bool set_l2t_idx)
1315{
1316	struct sge_eq *eq = &txq->eq;
1317	struct tx_sdesc *txsd;
1318	struct fw_ulptx_wr *wr;
1319	struct ulp_txpkt *txpkt;
1320	struct ulptx_sc_memrd *memrd;
1321	struct ulptx_idata *idata;
1322	struct cpl_tx_sec_pdu *sec_pdu;
1323	struct cpl_tx_data *tx_data;
1324	struct tls_record_layer *hdr;
1325	char *iv, *out;
1326	u_int aad_start, aad_stop;
1327	u_int auth_start, auth_stop, auth_insert;
1328	u_int cipher_start, cipher_stop, iv_offset;
1329	u_int imm_len, mss, ndesc, offset, plen, tlen, twr_len, wr_len;
1330	u_int fields, nsegs, tx_max_offset, tx_max;
1331	bool first_wr, last_wr, using_scratch;
1332
1333	ndesc = 0;
1334	MPASS(tlsp->txq == txq);
1335
1336	first_wr = (tlsp->prev_seq == 0 && tlsp->prev_ack == 0 &&
1337	    tlsp->prev_win == 0);
1338
1339	/*
1340	 * Use the per-txq scratch pad if near the end of the ring to
1341	 * simplify handling of wrap-around.  This uses a simple but
1342	 * not quite perfect test of using the scratch buffer if we
1343	 * can't fit a maximal work request in without wrapping.
1344	 */
1345	using_scratch = (eq->sidx - pidx < SGE_MAX_WR_LEN / EQ_ESIZE);
1346
1347	/* Locate the TLS header. */
1348	M_ASSERTEXTPG(m_tls);
1349	hdr = (void *)m_tls->m_epg_hdr;
1350	plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - m_tls->m_epg_trllen;
1351
1352	/* Determine how much of the TLS record to send. */
1353	tlen = ktls_tcp_payload_length(tlsp, m_tls);
1354	if (tlen <= m_tls->m_epg_hdrlen) {
1355		/*
1356		 * For requests that only want to send the TLS header,
1357		 * send a tunnelled packet as immediate data.
1358		 */
1359#ifdef VERBOSE_TRACES
1360		CTR3(KTR_CXGBE, "%s: tid %d header-only TLS record %u",
1361		    __func__, tlsp->tid, (u_int)m_tls->m_epg_seqno);
1362#endif
1363		return (ktls_write_tunnel_packet(txq, dst, m, m_tls, available,
1364		    tcp_seqno, pidx));
1365	}
1366	if (tlen < plen) {
1367		plen = tlen;
1368		offset = ktls_payload_offset(tlsp, m_tls);
1369#ifdef VERBOSE_TRACES
1370		CTR4(KTR_CXGBE, "%s: tid %d short TLS record %u with offset %u",
1371		    __func__, tlsp->tid, (u_int)m_tls->m_epg_seqno, offset);
1372#endif
1373		if (m_tls->m_next == NULL && (tcp->th_flags & TH_FIN) != 0) {
1374			txq->kern_tls_fin_short++;
1375#ifdef INVARIANTS
1376			panic("%s: FIN on short TLS record", __func__);
1377#endif
1378		}
1379	} else
1380		offset = 0;
1381
1382	/*
1383	 * This is the last work request for a given TLS mbuf chain if
1384	 * it is the last mbuf in the chain and FIN is not set.  If
1385	 * FIN is set, then ktls_write_tcp_fin() will write out the
1386	 * last work request.
1387	 */
1388	last_wr = m_tls->m_next == NULL && (tcp->th_flags & TH_FIN) == 0;
1389
1390	/*
1391	 * The host stack may ask us to not send part of the start of
1392	 * a TLS record.  (For example, the stack might have
1393	 * previously sent a "short" TLS record and might later send
1394	 * down an mbuf that requests to send the remainder of the TLS
1395	 * record.)  The crypto engine must process a TLS record from
1396	 * the beginning if computing a GCM tag or HMAC, so we always
1397	 * send the TLS record from the beginning as input to the
1398	 * crypto engine and via CPL_TX_DATA to TP.  However, TP will
1399	 * drop individual packets after they have been chopped up
1400	 * into MSS-sized chunks if the entire sequence range of those
1401	 * packets is less than SND_UNA.  SND_UNA is computed as
1402	 * TX_MAX - SND_UNA_RAW.  Thus, use the offset stored in
1403	 * m_data to set TX_MAX to the first byte in the TCP sequence
1404	 * space the host actually wants us to send and set
1405	 * SND_UNA_RAW to 0.
1406	 *
1407	 * If the host sends us back to back requests that span the
1408	 * trailer of a single TLS record (first request ends "in" the
1409	 * trailer and second request starts at the next byte but
1410	 * still "in" the trailer), the initial bytes of the trailer
1411	 * that the first request drops will not be retransmitted.  If
1412	 * the host uses the same requests when retransmitting the
1413	 * connection will hang.  To handle this, always transmit the
1414	 * full trailer for a request that begins "in" the trailer
1415	 * (the second request in the example above).  This should
1416	 * also help to avoid retransmits for the common case.
1417	 *
1418	 * A similar condition exists when using CBC for back to back
1419	 * requests that span a single AES block.  The first request
1420	 * will be truncated to end at the end of the previous AES
1421	 * block.  To handle this, always begin transmission at the
1422	 * start of the current AES block.
1423	 */
1424	tx_max_offset = mtod(m_tls, vm_offset_t);
1425	if (tx_max_offset > TLS_HEADER_LENGTH + ntohs(hdr->tls_length) -
1426	    m_tls->m_epg_trllen) {
1427		/* Always send the full trailer. */
1428		tx_max_offset = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) -
1429		    m_tls->m_epg_trllen;
1430	}
1431	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC &&
1432	    tx_max_offset > TLS_HEADER_LENGTH) {
1433		/* Always send all of the first AES block. */
1434		tx_max_offset = TLS_HEADER_LENGTH +
1435		    rounddown(tx_max_offset - TLS_HEADER_LENGTH,
1436		    AES_BLOCK_LEN);
1437	}
1438	tx_max = tcp_seqno + tx_max_offset;
1439
1440	/*
1441	 * Update TCB fields.  Reserve space for the FW_ULPTX_WR header
1442	 * but don't populate it until we know how many field updates
1443	 * are required.
1444	 */
1445	if (using_scratch)
1446		wr = (void *)txq->ss;
1447	else
1448		wr = dst;
1449	out = (void *)(wr + 1);
1450	fields = 0;
1451	if (set_l2t_idx) {
1452		KASSERT(m->m_next == m_tls,
1453		    ("trying to set L2T_IX for subsequent TLS WR"));
1454#ifdef VERBOSE_TRACES
1455		CTR3(KTR_CXGBE, "%s: tid %d set L2T_IX to %d", __func__,
1456		    tlsp->tid, tlsp->l2te->idx);
1457#endif
1458		write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_L2T_IX,
1459		    V_TCB_L2T_IX(M_TCB_L2T_IX), V_TCB_L2T_IX(tlsp->l2te->idx));
1460		out += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1461		fields++;
1462	}
1463	if (tsopt != NULL && tlsp->prev_tsecr != ntohl(tsopt[1])) {
1464		KASSERT(m->m_next == m_tls,
1465		    ("trying to set T_RTSEQ_RECENT for subsequent TLS WR"));
1466#ifdef VERBOSE_TRACES
1467		CTR2(KTR_CXGBE, "%s: tid %d wrote updated T_RTSEQ_RECENT",
1468		    __func__, tlsp->tid);
1469#endif
1470		write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_T_RTSEQ_RECENT,
1471		    V_TCB_T_RTSEQ_RECENT(M_TCB_T_RTSEQ_RECENT),
1472		    V_TCB_T_RTSEQ_RECENT(ntohl(tsopt[1])));
1473		out += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1474		fields++;
1475
1476		tlsp->prev_tsecr = ntohl(tsopt[1]);
1477	}
1478
1479	if (first_wr || tlsp->prev_seq != tx_max) {
1480		KASSERT(m->m_next == m_tls,
1481		    ("trying to set TX_MAX for subsequent TLS WR"));
1482#ifdef VERBOSE_TRACES
1483		CTR4(KTR_CXGBE,
1484		    "%s: tid %d setting TX_MAX to %u (tcp_seqno %u)",
1485		    __func__, tlsp->tid, tx_max, tcp_seqno);
1486#endif
1487		write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_TX_MAX,
1488		    V_TCB_TX_MAX(M_TCB_TX_MAX), V_TCB_TX_MAX(tx_max));
1489		out += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1490		fields++;
1491	}
1492
1493	/*
1494	 * If there is data to drop at the beginning of this TLS
1495	 * record or if this is a retransmit,
1496	 * reset SND_UNA_RAW to 0 so that SND_UNA == TX_MAX.
1497	 */
1498	if (tlsp->prev_seq != tx_max || mtod(m_tls, vm_offset_t) != 0) {
1499		KASSERT(m->m_next == m_tls,
1500		    ("trying to clear SND_UNA_RAW for subsequent TLS WR"));
1501#ifdef VERBOSE_TRACES
1502		CTR2(KTR_CXGBE, "%s: tid %d clearing SND_UNA_RAW", __func__,
1503		    tlsp->tid);
1504#endif
1505		write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_SND_UNA_RAW,
1506		    V_TCB_SND_UNA_RAW(M_TCB_SND_UNA_RAW),
1507		    V_TCB_SND_UNA_RAW(0));
1508		out += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1509		fields++;
1510	}
1511
1512	/*
1513	 * Store the expected sequence number of the next byte after
1514	 * this record.
1515	 */
1516	tlsp->prev_seq = tcp_seqno + tlen;
1517
1518	if (first_wr || tlsp->prev_ack != ntohl(tcp->th_ack)) {
1519		KASSERT(m->m_next == m_tls,
1520		    ("trying to set RCV_NXT for subsequent TLS WR"));
1521		write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_RCV_NXT,
1522		    V_TCB_RCV_NXT(M_TCB_RCV_NXT),
1523		    V_TCB_RCV_NXT(ntohl(tcp->th_ack)));
1524		out += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1525		fields++;
1526
1527		tlsp->prev_ack = ntohl(tcp->th_ack);
1528	}
1529
1530	if (first_wr || tlsp->prev_win != ntohs(tcp->th_win)) {
1531		KASSERT(m->m_next == m_tls,
1532		    ("trying to set RCV_WND for subsequent TLS WR"));
1533		write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_RCV_WND,
1534		    V_TCB_RCV_WND(M_TCB_RCV_WND),
1535		    V_TCB_RCV_WND(ntohs(tcp->th_win)));
1536		out += roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1537		fields++;
1538
1539		tlsp->prev_win = ntohs(tcp->th_win);
1540	}
1541
1542	/* Use cached value for first record in chain. */
1543	if (m->m_next == m_tls)
1544		nsegs = mbuf_nsegs(m);
1545	else
1546		nsegs = sglist_count_mbuf_epg(m_tls, m_tls->m_epg_hdrlen +
1547		    offset, plen - (m_tls->m_epg_hdrlen + offset));
1548
1549	/* Calculate the size of the TLS work request. */
1550	twr_len = ktls_base_wr_size(tlsp);
1551
1552	imm_len = 0;
1553	if (offset == 0)
1554		imm_len += m_tls->m_epg_hdrlen;
1555	if (plen == tlen)
1556		imm_len += AES_BLOCK_LEN;
1557	twr_len += roundup2(imm_len, 16);
1558	twr_len += ktls_sgl_size(nsegs);
1559
1560	/*
1561	 * If any field updates were required, determine if they can
1562	 * be included in the TLS work request.  If not, use the
1563	 * FW_ULPTX_WR work request header at 'wr' as a dedicated work
1564	 * request for the field updates and start a new work request
1565	 * for the TLS work request afterward.
1566	 */
1567	if (fields != 0) {
1568		wr_len = fields * roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1569		if (twr_len + wr_len <= SGE_MAX_WR_LEN &&
1570		    tlsp->sc->tlst.combo_wrs) {
1571			wr_len += twr_len;
1572			txpkt = (void *)out;
1573		} else {
1574			wr_len += sizeof(*wr);
1575			wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR));
1576			wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA |
1577			    V_FW_WR_LEN16(wr_len / 16));
1578			wr->cookie = 0;
1579
1580			/*
1581			 * If we were using scratch space, copy the
1582			 * field updates work request to the ring.
1583			 */
1584			if (using_scratch) {
1585				out = dst;
1586				copy_to_txd(eq, txq->ss, &out, wr_len);
1587			}
1588
1589			ndesc = howmany(wr_len, EQ_ESIZE);
1590			MPASS(ndesc <= available);
1591
1592			txq->raw_wrs++;
1593			txsd = &txq->sdesc[pidx];
1594			txsd->m = NULL;
1595			txsd->desc_used = ndesc;
1596			IDXINCR(pidx, ndesc, eq->sidx);
1597			dst = &eq->desc[pidx];
1598
1599			/*
1600			 * Determine if we should use scratch space
1601			 * for the TLS work request based on the
1602			 * available space after advancing pidx for
1603			 * the field updates work request.
1604			 */
1605			wr_len = twr_len;
1606			using_scratch = (eq->sidx - pidx <
1607			    howmany(wr_len, EQ_ESIZE));
1608			if (using_scratch)
1609				wr = (void *)txq->ss;
1610			else
1611				wr = dst;
1612			txpkt = (void *)(wr + 1);
1613		}
1614	} else {
1615		wr_len = twr_len;
1616		txpkt = (void *)out;
1617	}
1618
1619	wr_len = roundup2(wr_len, 16);
1620	MPASS(ndesc + howmany(wr_len, EQ_ESIZE) <= available);
1621
1622	/* FW_ULPTX_WR */
1623	wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR));
1624	wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA |
1625	    V_FW_WR_LEN16(wr_len / 16));
1626	wr->cookie = 0;
1627
1628	/* ULP_TXPKT */
1629	txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
1630	    V_ULP_TXPKT_DATAMODIFY(0) |
1631	    V_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) | V_ULP_TXPKT_DEST(0) |
1632	    V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1));
1633	txpkt->len = htobe32(howmany(twr_len - sizeof(*wr), 16));
1634
1635	/* ULPTX_IDATA sub-command */
1636	idata = (void *)(txpkt + 1);
1637	idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
1638	    V_ULP_TX_SC_MORE(1));
1639	idata->len = sizeof(struct cpl_tx_sec_pdu);
1640
1641	/*
1642	 * The key context, CPL_TX_DATA, and immediate data are part
1643	 * of this ULPTX_IDATA when using an inline key.  When reading
1644	 * the key from memory, the CPL_TX_DATA and immediate data are
1645	 * part of a separate ULPTX_IDATA.
1646	 */
1647	if (tlsp->inline_key)
1648		idata->len += tlsp->tx_key_info_size +
1649		    sizeof(struct cpl_tx_data) + imm_len;
1650	idata->len = htobe32(idata->len);
1651
1652	/* CPL_TX_SEC_PDU */
1653	sec_pdu = (void *)(idata + 1);
1654
1655	/*
1656	 * For short records, AAD is counted as header data in SCMD0,
1657	 * the IV is next followed by a cipher region for the payload.
1658	 */
1659	if (plen == tlen) {
1660		aad_start = 0;
1661		aad_stop = 0;
1662		iv_offset = 1;
1663		auth_start = 0;
1664		auth_stop = 0;
1665		auth_insert = 0;
1666		cipher_start = AES_BLOCK_LEN + 1;
1667		cipher_stop = 0;
1668
1669		sec_pdu->pldlen = htobe32(16 + plen -
1670		    (m_tls->m_epg_hdrlen + offset));
1671
1672		/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
1673		sec_pdu->seqno_numivs = tlsp->scmd0_short.seqno_numivs;
1674		sec_pdu->ivgen_hdrlen = htobe32(
1675		    tlsp->scmd0_short.ivgen_hdrlen |
1676		    V_SCMD_HDR_LEN(offset == 0 ? m_tls->m_epg_hdrlen : 0));
1677
1678		txq->kern_tls_short++;
1679	} else {
1680		/*
1681		 * AAD is TLS header.  IV is after AAD.  The cipher region
1682		 * starts after the IV.  See comments in ccr_authenc() and
1683		 * ccr_gmac() in t4_crypto.c regarding cipher and auth
1684		 * start/stop values.
1685		 */
1686		aad_start = 1;
1687		aad_stop = TLS_HEADER_LENGTH;
1688		iv_offset = TLS_HEADER_LENGTH + 1;
1689		cipher_start = m_tls->m_epg_hdrlen + 1;
1690		if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
1691			cipher_stop = 0;
1692			auth_start = cipher_start;
1693			auth_stop = 0;
1694			auth_insert = 0;
1695		} else {
1696			cipher_stop = 0;
1697			auth_start = cipher_start;
1698			auth_stop = 0;
1699			auth_insert = 0;
1700		}
1701
1702		sec_pdu->pldlen = htobe32(plen);
1703
1704		/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
1705		sec_pdu->seqno_numivs = tlsp->scmd0.seqno_numivs;
1706		sec_pdu->ivgen_hdrlen = tlsp->scmd0.ivgen_hdrlen;
1707
1708		if (mtod(m_tls, vm_offset_t) == 0)
1709			txq->kern_tls_full++;
1710		else
1711			txq->kern_tls_partial++;
1712	}
1713	sec_pdu->op_ivinsrtofst = htobe32(
1714	    V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) |
1715	    V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) |
1716	    V_CPL_TX_SEC_PDU_IVINSRTOFST(iv_offset));
1717	sec_pdu->aadstart_cipherstop_hi = htobe32(
1718	    V_CPL_TX_SEC_PDU_AADSTART(aad_start) |
1719	    V_CPL_TX_SEC_PDU_AADSTOP(aad_stop) |
1720	    V_CPL_TX_SEC_PDU_CIPHERSTART(cipher_start) |
1721	    V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(cipher_stop >> 4));
1722	sec_pdu->cipherstop_lo_authinsert = htobe32(
1723	    V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(cipher_stop & 0xf) |
1724	    V_CPL_TX_SEC_PDU_AUTHSTART(auth_start) |
1725	    V_CPL_TX_SEC_PDU_AUTHSTOP(auth_stop) |
1726	    V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert));
1727
1728	sec_pdu->scmd1 = htobe64(m_tls->m_epg_seqno);
1729
1730	/* Key context */
1731	out = (void *)(sec_pdu + 1);
1732	if (tlsp->inline_key) {
1733		memcpy(out, &tlsp->keyctx, tlsp->tx_key_info_size);
1734		out += tlsp->tx_key_info_size;
1735	} else {
1736		/* ULPTX_SC_MEMRD to read key context. */
1737		memrd = (void *)out;
1738		memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) |
1739		    V_ULP_TX_SC_MORE(1) |
1740		    V_ULPTX_LEN16(tlsp->tx_key_info_size >> 4));
1741		memrd->addr = htobe32(tlsp->tx_key_addr >> 5);
1742
1743		/* ULPTX_IDATA for CPL_TX_DATA and TLS header. */
1744		idata = (void *)(memrd + 1);
1745		idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
1746		    V_ULP_TX_SC_MORE(1));
1747		idata->len = htobe32(sizeof(struct cpl_tx_data) + imm_len);
1748
1749		out = (void *)(idata + 1);
1750	}
1751
1752	/* CPL_TX_DATA */
1753	tx_data = (void *)out;
1754	OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tlsp->tid));
1755	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1756		mss = m->m_pkthdr.tso_segsz;
1757		tlsp->prev_mss = mss;
1758	} else if (tlsp->prev_mss != 0)
1759		mss = tlsp->prev_mss;
1760	else
1761		mss = if_getmtu(tlsp->vi->ifp) -
1762		    (m->m_pkthdr.l3hlen + m->m_pkthdr.l4hlen);
1763	if (offset == 0) {
1764		tx_data->len = htobe32(V_TX_DATA_MSS(mss) | V_TX_LENGTH(tlen));
1765		tx_data->rsvd = htobe32(tcp_seqno);
1766	} else {
1767		tx_data->len = htobe32(V_TX_DATA_MSS(mss) |
1768		    V_TX_LENGTH(tlen - (m_tls->m_epg_hdrlen + offset)));
1769		tx_data->rsvd = htobe32(tcp_seqno + m_tls->m_epg_hdrlen + offset);
1770	}
1771	tx_data->flags = htobe32(F_TX_BYPASS);
1772	if (last_wr && tcp->th_flags & TH_PUSH)
1773		tx_data->flags |= htobe32(F_TX_PUSH | F_TX_SHOVE);
1774
1775	/* Populate the TLS header */
1776	out = (void *)(tx_data + 1);
1777	if (offset == 0) {
1778		memcpy(out, m_tls->m_epg_hdr, m_tls->m_epg_hdrlen);
1779		out += m_tls->m_epg_hdrlen;
1780	}
1781
1782	/* AES IV for a short record. */
1783	if (plen == tlen) {
1784		iv = out;
1785		if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
1786			memcpy(iv, tlsp->keyctx.u.txhdr.txsalt, SALT_SIZE);
1787			memcpy(iv + 4, hdr + 1, 8);
1788			*(uint32_t *)(iv + 12) = htobe32(2 +
1789			    offset / AES_BLOCK_LEN);
1790		} else
1791			memcpy(iv, hdr + 1, AES_BLOCK_LEN);
1792		out += AES_BLOCK_LEN;
1793	}
1794
1795	if (imm_len % 16 != 0) {
1796		/* Zero pad to an 8-byte boundary. */
1797		memset(out, 0, 8 - (imm_len % 8));
1798		out += 8 - (imm_len % 8);
1799
1800		/*
1801		 * Insert a ULP_TX_SC_NOOP if needed so the SGL is
1802		 * 16-byte aligned.
1803		 */
1804		if (imm_len % 16 <= 8) {
1805			idata = (void *)out;
1806			idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
1807			idata->len = htobe32(0);
1808			out = (void *)(idata + 1);
1809		}
1810	}
1811
1812	/* SGL for record payload */
1813	sglist_reset(txq->gl);
1814	if (sglist_append_mbuf_epg(txq->gl, m_tls, m_tls->m_epg_hdrlen + offset,
1815	    plen - (m_tls->m_epg_hdrlen + offset)) != 0) {
1816#ifdef INVARIANTS
1817		panic("%s: failed to append sglist", __func__);
1818#endif
1819	}
1820	write_gl_to_buf(txq->gl, out);
1821
1822	if (using_scratch) {
1823		out = dst;
1824		copy_to_txd(eq, txq->ss, &out, wr_len);
1825	}
1826
1827	ndesc += howmany(wr_len, EQ_ESIZE);
1828	MPASS(ndesc <= available);
1829
1830	txq->kern_tls_records++;
1831	txq->kern_tls_octets += tlen - mtod(m_tls, vm_offset_t);
1832	if (mtod(m_tls, vm_offset_t) != 0) {
1833		if (offset == 0)
1834			txq->kern_tls_waste += mtod(m_tls, vm_offset_t);
1835		else
1836			txq->kern_tls_waste += mtod(m_tls, vm_offset_t) -
1837			    (m_tls->m_epg_hdrlen + offset);
1838	}
1839
1840	txsd = &txq->sdesc[pidx];
1841	if (last_wr)
1842		txsd->m = m;
1843	else
1844		txsd->m = NULL;
1845	txsd->desc_used = howmany(wr_len, EQ_ESIZE);
1846
1847	return (ndesc);
1848}
1849
1850static int
1851ktls_write_tcp_fin(struct sge_txq *txq, void *dst, struct mbuf *m,
1852    u_int available, tcp_seq tcp_seqno, u_int pidx)
1853{
1854	struct tx_sdesc *txsd;
1855	struct fw_eth_tx_pkt_wr *wr;
1856	struct cpl_tx_pkt_core *cpl;
1857	uint32_t ctrl;
1858	uint64_t ctrl1;
1859	int len16, ndesc, pktlen;
1860	struct ether_header *eh;
1861	struct ip *ip, newip;
1862	struct ip6_hdr *ip6, newip6;
1863	struct tcphdr *tcp, newtcp;
1864	caddr_t out;
1865
1866	TXQ_LOCK_ASSERT_OWNED(txq);
1867	M_ASSERTPKTHDR(m);
1868
1869	wr = dst;
1870	pktlen = m->m_len;
1871	ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen;
1872	len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16);
1873	ndesc = tx_len16_to_desc(len16);
1874	MPASS(ndesc <= available);
1875
1876	/* Firmware work request header */
1877	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
1878	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
1879
1880	ctrl = V_FW_WR_LEN16(len16);
1881	wr->equiq_to_len16 = htobe32(ctrl);
1882	wr->r3 = 0;
1883
1884	cpl = (void *)(wr + 1);
1885
1886	/* CPL header */
1887	cpl->ctrl0 = txq->cpl_ctrl0;
1888	cpl->pack = 0;
1889	cpl->len = htobe16(pktlen);
1890
1891	out = (void *)(cpl + 1);
1892
1893	/* Copy over Ethernet header. */
1894	eh = mtod(m, struct ether_header *);
1895	copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen);
1896
1897	/* Fixup length in IP header and copy out. */
1898	if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
1899		ip = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1900		newip = *ip;
1901		newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen);
1902		copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip));
1903		if (m->m_pkthdr.l3hlen > sizeof(*ip))
1904			copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out,
1905			    m->m_pkthdr.l3hlen - sizeof(*ip));
1906		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP) |
1907		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
1908		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
1909	} else {
1910		ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1911		newip6 = *ip6;
1912		newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen);
1913		copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6));
1914		MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6));
1915		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP6) |
1916		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
1917		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
1918	}
1919	cpl->ctrl1 = htobe64(ctrl1);
1920	txq->txcsum++;
1921
1922	/* Set sequence number in TCP header. */
1923	tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen);
1924	newtcp = *tcp;
1925	newtcp.th_seq = htonl(tcp_seqno);
1926	copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp));
1927
1928	/* Copy rest of packet. */
1929	copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, m->m_len -
1930	    (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp)));
1931	txq->imm_wrs++;
1932
1933	txq->txpkt_wrs++;
1934
1935	txq->kern_tls_fin++;
1936
1937	txsd = &txq->sdesc[pidx];
1938	txsd->m = m;
1939	txsd->desc_used = ndesc;
1940
1941	return (ndesc);
1942}
1943
1944int
1945t6_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m,
1946    u_int available)
1947{
1948	struct sge_eq *eq = &txq->eq;
1949	struct tx_sdesc *txsd;
1950	struct tlspcb *tlsp;
1951	struct tcphdr *tcp;
1952	struct mbuf *m_tls;
1953	struct ether_header *eh;
1954	tcp_seq tcp_seqno;
1955	u_int ndesc, pidx, totdesc;
1956	uint16_t vlan_tag;
1957	bool has_fin, set_l2t_idx;
1958	void *tsopt;
1959
1960	M_ASSERTPKTHDR(m);
1961	MPASS(m->m_pkthdr.snd_tag != NULL);
1962	tlsp = mst_to_tls(m->m_pkthdr.snd_tag);
1963
1964	totdesc = 0;
1965	eh = mtod(m, struct ether_header *);
1966	tcp = (struct tcphdr *)((char *)eh + m->m_pkthdr.l2hlen +
1967	    m->m_pkthdr.l3hlen);
1968	pidx = eq->pidx;
1969	has_fin = (tcp->th_flags & TH_FIN) != 0;
1970
1971	/*
1972	 * If this TLS record has a FIN, then we will send any
1973	 * requested options as part of the FIN packet.
1974	 */
1975	if (!has_fin && ktls_has_tcp_options(tcp)) {
1976		ndesc = ktls_write_tcp_options(txq, dst, m, available, pidx);
1977		totdesc += ndesc;
1978		IDXINCR(pidx, ndesc, eq->sidx);
1979		dst = &eq->desc[pidx];
1980#ifdef VERBOSE_TRACES
1981		CTR2(KTR_CXGBE, "%s: tid %d wrote TCP options packet", __func__,
1982		    tlsp->tid);
1983#endif
1984	}
1985
1986	/*
1987	 * Allocate a new L2T entry if necessary.  This may write out
1988	 * a work request to the txq.
1989	 */
1990	if (m->m_flags & M_VLANTAG)
1991		vlan_tag = m->m_pkthdr.ether_vtag;
1992	else
1993		vlan_tag = 0xfff;
1994	set_l2t_idx = false;
1995	if (tlsp->l2te == NULL || tlsp->l2te->vlan != vlan_tag ||
1996	    memcmp(tlsp->l2te->dmac, eh->ether_dhost, ETHER_ADDR_LEN) != 0) {
1997		set_l2t_idx = true;
1998		if (tlsp->l2te)
1999			t4_l2t_release(tlsp->l2te);
2000		tlsp->l2te = t4_l2t_alloc_tls(tlsp->sc, txq, dst, &ndesc,
2001		    vlan_tag, tlsp->vi->pi->lport, eh->ether_dhost);
2002		if (tlsp->l2te == NULL)
2003			CXGBE_UNIMPLEMENTED("failed to allocate TLS L2TE");
2004		if (ndesc != 0) {
2005			MPASS(ndesc <= available - totdesc);
2006
2007			txq->raw_wrs++;
2008			txsd = &txq->sdesc[pidx];
2009			txsd->m = NULL;
2010			txsd->desc_used = ndesc;
2011			totdesc += ndesc;
2012			IDXINCR(pidx, ndesc, eq->sidx);
2013			dst = &eq->desc[pidx];
2014		}
2015	}
2016
2017	/*
2018	 * Iterate over each TLS record constructing a work request
2019	 * for that record.
2020	 */
2021	for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) {
2022		MPASS(m_tls->m_flags & M_EXTPG);
2023
2024		/*
2025		 * Determine the initial TCP sequence number for this
2026		 * record.
2027		 */
2028		tsopt = NULL;
2029		if (m_tls == m->m_next) {
2030			tcp_seqno = ntohl(tcp->th_seq) -
2031			    mtod(m_tls, vm_offset_t);
2032			if (tlsp->using_timestamps)
2033				tsopt = ktls_find_tcp_timestamps(tcp);
2034		} else {
2035			MPASS(mtod(m_tls, vm_offset_t) == 0);
2036			tcp_seqno = tlsp->prev_seq;
2037		}
2038
2039		ndesc = ktls_write_tls_wr(tlsp, txq, dst, m, tcp, m_tls,
2040		    available - totdesc, tcp_seqno, tsopt, pidx, set_l2t_idx);
2041		totdesc += ndesc;
2042		IDXINCR(pidx, ndesc, eq->sidx);
2043		dst = &eq->desc[pidx];
2044
2045		/* Only need to set the L2T index once. */
2046		set_l2t_idx = false;
2047	}
2048
2049	if (has_fin) {
2050		/*
2051		 * If the TCP header for this chain has FIN sent, then
2052		 * explicitly send a packet that has FIN set.  This
2053		 * will also have PUSH set if requested.  This assumes
2054		 * we sent at least one TLS record work request and
2055		 * uses the TCP sequence number after that reqeust as
2056		 * the sequence number for the FIN packet.
2057		 */
2058		ndesc = ktls_write_tcp_fin(txq, dst, m, available,
2059		    tlsp->prev_seq, pidx);
2060		totdesc += ndesc;
2061	}
2062
2063	MPASS(totdesc <= available);
2064	return (totdesc);
2065}
2066
2067static void
2068t6_tls_tag_free(struct m_snd_tag *mst)
2069{
2070	struct adapter *sc;
2071	struct tlspcb *tlsp;
2072
2073	tlsp = mst_to_tls(mst);
2074	sc = tlsp->sc;
2075
2076	CTR2(KTR_CXGBE, "%s: tid %d", __func__, tlsp->tid);
2077
2078	if (tlsp->l2te)
2079		t4_l2t_release(tlsp->l2te);
2080	if (tlsp->tid >= 0)
2081		release_tid(sc, tlsp->tid, tlsp->ctrlq);
2082	if (tlsp->ce)
2083		t4_release_clip_entry(sc, tlsp->ce);
2084	if (tlsp->tx_key_addr >= 0)
2085		t4_free_tls_keyid(sc, tlsp->tx_key_addr);
2086
2087	zfree(tlsp, M_CXGBE);
2088}
2089
2090void
2091t6_ktls_modload(void)
2092{
2093
2094	t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, ktls_act_open_rpl,
2095	    CPL_COOKIE_KERN_TLS);
2096}
2097
2098void
2099t6_ktls_modunload(void)
2100{
2101
2102	t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, NULL,
2103	    CPL_COOKIE_KERN_TLS);
2104}
2105
2106#else
2107
2108int
2109t6_tls_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params,
2110    struct m_snd_tag **pt)
2111{
2112	return (ENXIO);
2113}
2114
2115int
2116t6_ktls_parse_pkt(struct mbuf *m)
2117{
2118	return (EINVAL);
2119}
2120
2121int
2122t6_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m,
2123    u_int available)
2124{
2125	panic("can't happen");
2126}
2127
2128void
2129t6_ktls_modload(void)
2130{
2131}
2132
2133void
2134t6_ktls_modunload(void)
2135{
2136}
2137
2138#endif
2139