1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2018 Chelsio Communications, Inc.
4 *
5 * Written by: Atul Gupta (atul.gupta@chelsio.com)
6 */
7
8#include <linux/module.h>
9#include <linux/list.h>
10#include <linux/workqueue.h>
11#include <linux/skbuff.h>
12#include <linux/timer.h>
13#include <linux/notifier.h>
14#include <linux/inetdevice.h>
15#include <linux/ip.h>
16#include <linux/tcp.h>
17#include <linux/sched/signal.h>
18#include <net/tcp.h>
19#include <net/busy_poll.h>
20#include <crypto/aes.h>
21
22#include "chtls.h"
23#include "chtls_cm.h"
24
25static bool is_tls_tx(struct chtls_sock *csk)
26{
27	return csk->tlshws.txkey >= 0;
28}
29
30static bool is_tls_rx(struct chtls_sock *csk)
31{
32	return csk->tlshws.rxkey >= 0;
33}
34
35static int data_sgl_len(const struct sk_buff *skb)
36{
37	unsigned int cnt;
38
39	cnt = skb_shinfo(skb)->nr_frags;
40	return sgl_len(cnt) * 8;
41}
42
43static int nos_ivs(struct sock *sk, unsigned int size)
44{
45	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
46
47	return DIV_ROUND_UP(size, csk->tlshws.mfs);
48}
49
50static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
51{
52	int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
53	int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
54
55	if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
56	    MAX_IMM_OFLD_TX_DATA_WR_LEN) {
57		ULP_SKB_CB(skb)->ulp.tls.iv = 1;
58		return 1;
59	}
60	ULP_SKB_CB(skb)->ulp.tls.iv = 0;
61	return 0;
62}
63
64static int max_ivs_size(struct sock *sk, int size)
65{
66	return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
67}
68
69static int ivs_size(struct sock *sk, const struct sk_buff *skb)
70{
71	return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
72		 CIPHER_BLOCK_SIZE) : 0;
73}
74
75static int flowc_wr_credits(int nparams, int *flowclenp)
76{
77	int flowclen16, flowclen;
78
79	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
80	flowclen16 = DIV_ROUND_UP(flowclen, 16);
81	flowclen = flowclen16 * 16;
82
83	if (flowclenp)
84		*flowclenp = flowclen;
85
86	return flowclen16;
87}
88
89static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
90					   struct fw_flowc_wr *flowc,
91					   int flowclen)
92{
93	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
94	struct sk_buff *skb;
95
96	skb = alloc_skb(flowclen, GFP_ATOMIC);
97	if (!skb)
98		return NULL;
99
100	__skb_put_data(skb, flowc, flowclen);
101	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
102
103	return skb;
104}
105
106static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
107			 int flowclen)
108{
109	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
110	struct tcp_sock *tp = tcp_sk(sk);
111	struct sk_buff *skb;
112	int flowclen16;
113	int ret;
114
115	flowclen16 = flowclen / 16;
116
117	if (csk_flag(sk, CSK_TX_DATA_SENT)) {
118		skb = create_flowc_wr_skb(sk, flowc, flowclen);
119		if (!skb)
120			return -ENOMEM;
121
122		skb_entail(sk, skb,
123			   ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
124		return 0;
125	}
126
127	ret = cxgb4_immdata_send(csk->egress_dev,
128				 csk->txq_idx,
129				 flowc, flowclen);
130	if (!ret)
131		return flowclen16;
132	skb = create_flowc_wr_skb(sk, flowc, flowclen);
133	if (!skb)
134		return -ENOMEM;
135	send_or_defer(sk, tp, skb, 0);
136	return flowclen16;
137}
138
139static u8 tcp_state_to_flowc_state(u8 state)
140{
141	switch (state) {
142	case TCP_ESTABLISHED:
143		return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
144	case TCP_CLOSE_WAIT:
145		return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
146	case TCP_FIN_WAIT1:
147		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
148	case TCP_CLOSING:
149		return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
150	case TCP_LAST_ACK:
151		return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
152	case TCP_FIN_WAIT2:
153		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
154	}
155
156	return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
157}
158
159int send_tx_flowc_wr(struct sock *sk, int compl,
160		     u32 snd_nxt, u32 rcv_nxt)
161{
162	struct flowc_packed {
163		struct fw_flowc_wr fc;
164		struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
165	} __packed sflowc;
166	int nparams, paramidx, flowclen16, flowclen;
167	struct fw_flowc_wr *flowc;
168	struct chtls_sock *csk;
169	struct tcp_sock *tp;
170
171	csk = rcu_dereference_sk_user_data(sk);
172	tp = tcp_sk(sk);
173	memset(&sflowc, 0, sizeof(sflowc));
174	flowc = &sflowc.fc;
175
176#define FLOWC_PARAM(__m, __v) \
177	do { \
178		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
179		flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
180		paramidx++; \
181	} while (0)
182
183	paramidx = 0;
184
185	FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
186	FLOWC_PARAM(CH, csk->tx_chan);
187	FLOWC_PARAM(PORT, csk->tx_chan);
188	FLOWC_PARAM(IQID, csk->rss_qid);
189	FLOWC_PARAM(SNDNXT, tp->snd_nxt);
190	FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
191	FLOWC_PARAM(SNDBUF, csk->sndbuf);
192	FLOWC_PARAM(MSS, tp->mss_cache);
193	FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
194
195	if (SND_WSCALE(tp))
196		FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
197
198	if (csk->ulp_mode == ULP_MODE_TLS)
199		FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
200
201	if (csk->tlshws.fcplenmax)
202		FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
203
204	nparams = paramidx;
205#undef FLOWC_PARAM
206
207	flowclen16 = flowc_wr_credits(nparams, &flowclen);
208	flowc->op_to_nparams =
209		cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
210			    FW_WR_COMPL_V(compl) |
211			    FW_FLOWC_WR_NPARAMS_V(nparams));
212	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
213					  FW_WR_FLOWID_V(csk->tid));
214
215	return send_flowc_wr(sk, flowc, flowclen);
216}
217
218/* Copy IVs to WR */
219static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
220
221{
222	struct chtls_sock *csk;
223	unsigned char *iv_loc;
224	struct chtls_hws *hws;
225	unsigned char *ivs;
226	u16 number_of_ivs;
227	struct page *page;
228	int err = 0;
229
230	csk = rcu_dereference_sk_user_data(sk);
231	hws = &csk->tlshws;
232	number_of_ivs = nos_ivs(sk, skb->len);
233
234	if (number_of_ivs > MAX_IVS_PAGE) {
235		pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
236		return -ENOMEM;
237	}
238
239	/* generate the  IVs */
240	ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
241	if (!ivs)
242		return -ENOMEM;
243	get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
244
245	if (skb_ulp_tls_iv_imm(skb)) {
246		/* send the IVs as immediate data in the WR */
247		iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
248						CIPHER_BLOCK_SIZE);
249		if (iv_loc)
250			memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
251
252		hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
253	} else {
254		/* Send the IVs as sgls */
255		/* Already accounted IV DSGL for credits */
256		skb_shinfo(skb)->nr_frags--;
257		page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
258		if (!page) {
259			pr_info("%s : Page allocation for IVs failed\n",
260				__func__);
261			err = -ENOMEM;
262			goto out;
263		}
264		memcpy(page_address(page), ivs, number_of_ivs *
265		       CIPHER_BLOCK_SIZE);
266		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
267				   number_of_ivs * CIPHER_BLOCK_SIZE);
268		hws->ivsize = 0;
269	}
270out:
271	kfree(ivs);
272	return err;
273}
274
275/* Copy Key to WR */
276static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
277{
278	struct ulptx_sc_memrd *sc_memrd;
279	struct chtls_sock *csk;
280	struct chtls_dev *cdev;
281	struct ulptx_idata *sc;
282	struct chtls_hws *hws;
283	u32 immdlen;
284	int kaddr;
285
286	csk = rcu_dereference_sk_user_data(sk);
287	hws = &csk->tlshws;
288	cdev = csk->cdev;
289
290	immdlen = sizeof(*sc) + sizeof(*sc_memrd);
291	kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
292	sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
293	if (sc) {
294		sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
295		sc->len = htonl(0);
296		sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
297		sc_memrd->cmd_to_len =
298				htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
299				ULP_TX_SC_MORE_V(1) |
300				ULPTX_LEN16_V(hws->keylen >> 4));
301		sc_memrd->addr = htonl(kaddr);
302	}
303}
304
305static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
306{
307	return hws->tx_seq_no++;
308}
309
310static bool is_sg_request(const struct sk_buff *skb)
311{
312	return skb->peeked ||
313		(skb->len > MAX_IMM_ULPTX_WR_LEN);
314}
315
316/*
317 * Returns true if an sk_buff carries urgent data.
318 */
319static bool skb_urgent(struct sk_buff *skb)
320{
321	return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
322}
323
324/* TLS content type for CPL SFO */
325static unsigned char tls_content_type(unsigned char content_type)
326{
327	switch (content_type) {
328	case TLS_HDR_TYPE_CCS:
329		return CPL_TX_TLS_SFO_TYPE_CCS;
330	case TLS_HDR_TYPE_ALERT:
331		return CPL_TX_TLS_SFO_TYPE_ALERT;
332	case TLS_HDR_TYPE_HANDSHAKE:
333		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
334	case TLS_HDR_TYPE_HEARTBEAT:
335		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
336	}
337	return CPL_TX_TLS_SFO_TYPE_DATA;
338}
339
340static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
341			   int dlen, int tls_immd, u32 credits,
342			   int expn, int pdus)
343{
344	struct fw_tlstx_data_wr *req_wr;
345	struct cpl_tx_tls_sfo *req_cpl;
346	unsigned int wr_ulp_mode_force;
347	struct tls_scmd *updated_scmd;
348	unsigned char data_type;
349	struct chtls_sock *csk;
350	struct net_device *dev;
351	struct chtls_hws *hws;
352	struct tls_scmd *scmd;
353	struct adapter *adap;
354	unsigned char *req;
355	int immd_len;
356	int iv_imm;
357	int len;
358
359	csk = rcu_dereference_sk_user_data(sk);
360	iv_imm = skb_ulp_tls_iv_imm(skb);
361	dev = csk->egress_dev;
362	adap = netdev2adap(dev);
363	hws = &csk->tlshws;
364	scmd = &hws->scmd;
365	len = dlen + expn;
366
367	dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
368	atomic_inc(&adap->chcr_stats.tls_pdu_tx);
369
370	updated_scmd = scmd;
371	updated_scmd->seqno_numivs &= 0xffffff80;
372	updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
373	hws->scmd = *updated_scmd;
374
375	req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
376	req_cpl = (struct cpl_tx_tls_sfo *)req;
377	req = (unsigned char *)__skb_push(skb, (sizeof(struct
378				fw_tlstx_data_wr)));
379
380	req_wr = (struct fw_tlstx_data_wr *)req;
381	immd_len = (tls_immd ? dlen : 0);
382	req_wr->op_to_immdlen =
383		htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
384		FW_TLSTX_DATA_WR_COMPL_V(1) |
385		FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
386	req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
387				     FW_TLSTX_DATA_WR_LEN16_V(credits));
388	wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
389
390	if (is_sg_request(skb))
391		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
392			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
393			FW_OFLD_TX_DATA_WR_SHOVE_F);
394
395	req_wr->lsodisable_to_flags =
396			htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
397			      TX_URG_V(skb_urgent(skb)) |
398			      T6_TX_FORCE_F | wr_ulp_mode_force |
399			      TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
400					 skb_queue_empty(&csk->txq)));
401
402	req_wr->ctxloc_to_exp =
403			htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
404			      FW_TLSTX_DATA_WR_EXP_V(expn) |
405			      FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
406			      FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
407			      FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
408
409	/* Fill in the length */
410	req_wr->plen = htonl(len);
411	req_wr->mfs = htons(hws->mfs);
412	req_wr->adjustedplen_pkd =
413		htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
414	req_wr->expinplenmax_pkd =
415		htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
416	req_wr->pdusinplenmax_pkd =
417		FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
418	req_wr->r10 = 0;
419
420	data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
421	req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
422				       CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
423				       CPL_TX_TLS_SFO_CPL_LEN_V(2) |
424				       CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
425	req_cpl->pld_len = htonl(len - expn);
426
427	req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
428		((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
429		TLS_HDR_TYPE_HEARTBEAT : 0) |
430		CPL_TX_TLS_SFO_PROTOVER_V(0));
431
432	/* create the s-command */
433	req_cpl->r1_lo = 0;
434	req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
435	req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
436	req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
437}
438
439/*
440 * Calculate the TLS data expansion size
441 */
442static int chtls_expansion_size(struct sock *sk, int data_len,
443				int fullpdu,
444				unsigned short *pducnt)
445{
446	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
447	struct chtls_hws *hws = &csk->tlshws;
448	struct tls_scmd *scmd = &hws->scmd;
449	int fragsize = hws->mfs;
450	int expnsize = 0;
451	int fragleft;
452	int fragcnt;
453	int expppdu;
454
455	if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
456	    SCMD_CIPH_MODE_AES_GCM) {
457		expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
458			  TLS_HEADER_LENGTH;
459
460		if (fullpdu) {
461			*pducnt = data_len / (expppdu + fragsize);
462			if (*pducnt > 32)
463				*pducnt = 32;
464			else if (!*pducnt)
465				*pducnt = 1;
466			expnsize = (*pducnt) * expppdu;
467			return expnsize;
468		}
469		fragcnt = (data_len / fragsize);
470		expnsize =  fragcnt * expppdu;
471		fragleft = data_len % fragsize;
472		if (fragleft > 0)
473			expnsize += expppdu;
474	}
475	return expnsize;
476}
477
478/* WR with IV, KEY and CPL SFO added */
479static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
480			       int tls_tx_imm, int tls_len, u32 credits)
481{
482	unsigned short pdus_per_ulp = 0;
483	struct chtls_sock *csk;
484	struct chtls_hws *hws;
485	int expn_sz;
486	int pdus;
487
488	csk = rcu_dereference_sk_user_data(sk);
489	hws = &csk->tlshws;
490	pdus = DIV_ROUND_UP(tls_len, hws->mfs);
491	expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
492	if (!hws->compute) {
493		hws->expansion = chtls_expansion_size(sk,
494						      hws->fcplenmax,
495						      1, &pdus_per_ulp);
496		hws->pdus = pdus_per_ulp;
497		hws->adjustlen = hws->pdus *
498			((hws->expansion / hws->pdus) + hws->mfs);
499		hws->compute = 1;
500	}
501	if (tls_copy_ivs(sk, skb))
502		return;
503	tls_copy_tx_key(sk, skb);
504	tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
505	hws->tx_seq_no += (pdus - 1);
506}
507
508static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
509			    unsigned int immdlen, int len,
510			    u32 credits, u32 compl)
511{
512	struct fw_ofld_tx_data_wr *req;
513	unsigned int wr_ulp_mode_force;
514	struct chtls_sock *csk;
515	unsigned int opcode;
516
517	csk = rcu_dereference_sk_user_data(sk);
518	opcode = FW_OFLD_TX_DATA_WR;
519
520	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
521	req->op_to_immdlen = htonl(WR_OP_V(opcode) |
522				FW_WR_COMPL_V(compl) |
523				FW_WR_IMMDLEN_V(immdlen));
524	req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
525				FW_WR_LEN16_V(credits));
526
527	wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
528	if (is_sg_request(skb))
529		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
530			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
531				FW_OFLD_TX_DATA_WR_SHOVE_F);
532
533	req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
534			TX_URG_V(skb_urgent(skb)) |
535			TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
536				   skb_queue_empty(&csk->txq)));
537	req->plen = htonl(len);
538}
539
540static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
541			 bool size)
542{
543	int wr_size;
544
545	wr_size = TLS_WR_CPL_LEN;
546	wr_size += KEY_ON_MEM_SZ;
547	wr_size += ivs_size(csk->sk, skb);
548
549	if (size)
550		return wr_size;
551
552	/* frags counted for IV dsgl */
553	if (!skb_ulp_tls_iv_imm(skb))
554		skb_shinfo(skb)->nr_frags++;
555
556	return wr_size;
557}
558
559static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
560{
561	int length = skb->len;
562
563	if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
564		return false;
565
566	if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
567		/* Check TLS header len for Immediate */
568		if (csk->ulp_mode == ULP_MODE_TLS &&
569		    skb_ulp_tls_inline(skb))
570			length += chtls_wr_size(csk, skb, true);
571		else
572			length += sizeof(struct fw_ofld_tx_data_wr);
573
574		return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
575	}
576	return true;
577}
578
579static unsigned int calc_tx_flits(const struct sk_buff *skb,
580				  unsigned int immdlen)
581{
582	unsigned int flits, cnt;
583
584	flits = immdlen / 8;   /* headers */
585	cnt = skb_shinfo(skb)->nr_frags;
586	if (skb_tail_pointer(skb) != skb_transport_header(skb))
587		cnt++;
588	return flits + sgl_len(cnt);
589}
590
591static void arp_failure_discard(void *handle, struct sk_buff *skb)
592{
593	kfree_skb(skb);
594}
595
596int chtls_push_frames(struct chtls_sock *csk, int comp)
597{
598	struct chtls_hws *hws = &csk->tlshws;
599	struct tcp_sock *tp;
600	struct sk_buff *skb;
601	int total_size = 0;
602	struct sock *sk;
603	int wr_size;
604
605	wr_size = sizeof(struct fw_ofld_tx_data_wr);
606	sk = csk->sk;
607	tp = tcp_sk(sk);
608
609	if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
610		return 0;
611
612	if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
613		return 0;
614
615	while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
616	       (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
617		skb_queue_len(&csk->txq) > 1)) {
618		unsigned int credit_len = skb->len;
619		unsigned int credits_needed;
620		unsigned int completion = 0;
621		int tls_len = skb->len;/* TLS data len before IV/key */
622		unsigned int immdlen;
623		int len = skb->len;    /* length [ulp bytes] inserted by hw */
624		int flowclen16 = 0;
625		int tls_tx_imm = 0;
626
627		immdlen = skb->len;
628		if (!is_ofld_imm(csk, skb)) {
629			immdlen = skb_transport_offset(skb);
630			if (skb_ulp_tls_inline(skb))
631				wr_size = chtls_wr_size(csk, skb, false);
632			credit_len = 8 * calc_tx_flits(skb, immdlen);
633		} else {
634			if (skb_ulp_tls_inline(skb)) {
635				wr_size = chtls_wr_size(csk, skb, false);
636				tls_tx_imm = 1;
637			}
638		}
639		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
640			credit_len += wr_size;
641		credits_needed = DIV_ROUND_UP(credit_len, 16);
642		if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
643			flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
644						      tp->rcv_nxt);
645			if (flowclen16 <= 0)
646				break;
647			csk->wr_credits -= flowclen16;
648			csk->wr_unacked += flowclen16;
649			csk->wr_nondata += flowclen16;
650			csk_set_flag(csk, CSK_TX_DATA_SENT);
651		}
652
653		if (csk->wr_credits < credits_needed) {
654			if (skb_ulp_tls_inline(skb) &&
655			    !skb_ulp_tls_iv_imm(skb))
656				skb_shinfo(skb)->nr_frags--;
657			break;
658		}
659
660		__skb_unlink(skb, &csk->txq);
661		skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
662				      CPL_PRIORITY_DATA);
663		if (hws->ofld)
664			hws->txqid = (skb->queue_mapping >> 1);
665		skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
666		csk->wr_credits -= credits_needed;
667		csk->wr_unacked += credits_needed;
668		csk->wr_nondata = 0;
669		enqueue_wr(csk, skb);
670
671		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
672			if ((comp && csk->wr_unacked == credits_needed) ||
673			    (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
674			    csk->wr_unacked >= csk->wr_max_credits / 2) {
675				completion = 1;
676				csk->wr_unacked = 0;
677			}
678			if (skb_ulp_tls_inline(skb))
679				make_tlstx_data_wr(sk, skb, tls_tx_imm,
680						   tls_len, credits_needed);
681			else
682				make_tx_data_wr(sk, skb, immdlen, len,
683						credits_needed, completion);
684			tp->snd_nxt += len;
685			tp->lsndtime = tcp_jiffies32;
686			if (completion)
687				ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
688		} else {
689			struct cpl_close_con_req *req = cplhdr(skb);
690			unsigned int cmd  = CPL_OPCODE_G(ntohl
691					     (OPCODE_TID(req)));
692
693			if (cmd == CPL_CLOSE_CON_REQ)
694				csk_set_flag(csk,
695					     CSK_CLOSE_CON_REQUESTED);
696
697			if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
698			    (csk->wr_unacked >= csk->wr_max_credits / 2)) {
699				req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
700				csk->wr_unacked = 0;
701			}
702		}
703		total_size += skb->truesize;
704		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
705			csk_set_flag(csk, CSK_TX_WAIT_IDLE);
706		t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
707		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
708	}
709	sk->sk_wmem_queued -= total_size;
710	return total_size;
711}
712
713static void mark_urg(struct tcp_sock *tp, int flags,
714		     struct sk_buff *skb)
715{
716	if (unlikely(flags & MSG_OOB)) {
717		tp->snd_up = tp->write_seq;
718		ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
719					 ULPCB_FLAG_BARRIER |
720					 ULPCB_FLAG_NO_APPEND |
721					 ULPCB_FLAG_NEED_HDR;
722	}
723}
724
725/*
726 * Returns true if a connection should send more data to TCP engine
727 */
728static bool should_push(struct sock *sk)
729{
730	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
731	struct chtls_dev *cdev = csk->cdev;
732	struct tcp_sock *tp = tcp_sk(sk);
733
734	/*
735	 * If we've released our offload resources there's nothing to do ...
736	 */
737	if (!cdev)
738		return false;
739
740	/*
741	 * If there aren't any work requests in flight, or there isn't enough
742	 * data in flight, or Nagle is off then send the current TX_DATA
743	 * otherwise hold it and wait to accumulate more data.
744	 */
745	return csk->wr_credits == csk->wr_max_credits ||
746		(tp->nonagle & TCP_NAGLE_OFF);
747}
748
749/*
750 * Returns true if a TCP socket is corked.
751 */
752static bool corked(const struct tcp_sock *tp, int flags)
753{
754	return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
755}
756
757/*
758 * Returns true if a send should try to push new data.
759 */
760static bool send_should_push(struct sock *sk, int flags)
761{
762	return should_push(sk) && !corked(tcp_sk(sk), flags);
763}
764
765void chtls_tcp_push(struct sock *sk, int flags)
766{
767	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
768	int qlen = skb_queue_len(&csk->txq);
769
770	if (likely(qlen)) {
771		struct sk_buff *skb = skb_peek_tail(&csk->txq);
772		struct tcp_sock *tp = tcp_sk(sk);
773
774		mark_urg(tp, flags, skb);
775
776		if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
777		    corked(tp, flags)) {
778			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
779			return;
780		}
781
782		ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
783		if (qlen == 1 &&
784		    ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
785		     should_push(sk)))
786			chtls_push_frames(csk, 1);
787	}
788}
789
790/*
791 * Calculate the size for a new send sk_buff.  It's maximum size so we can
792 * pack lots of data into it, unless we plan to send it immediately, in which
793 * case we size it more tightly.
794 *
795 * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
796 * arise in normal cases and when it does we are just wasting memory.
797 */
798static int select_size(struct sock *sk, int io_len, int flags, int len)
799{
800	const int pgbreak = SKB_MAX_HEAD(len);
801
802	/*
803	 * If the data wouldn't fit in the main body anyway, put only the
804	 * header in the main body so it can use immediate data and place all
805	 * the payload in page fragments.
806	 */
807	if (io_len > pgbreak)
808		return 0;
809
810	/*
811	 * If we will be accumulating payload get a large main body.
812	 */
813	if (!send_should_push(sk, flags))
814		return pgbreak;
815
816	return io_len;
817}
818
819void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
820{
821	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
822	struct tcp_sock *tp = tcp_sk(sk);
823
824	ULP_SKB_CB(skb)->seq = tp->write_seq;
825	ULP_SKB_CB(skb)->flags = flags;
826	__skb_queue_tail(&csk->txq, skb);
827	sk->sk_wmem_queued += skb->truesize;
828
829	if (TCP_PAGE(sk) && TCP_OFF(sk)) {
830		put_page(TCP_PAGE(sk));
831		TCP_PAGE(sk) = NULL;
832		TCP_OFF(sk) = 0;
833	}
834}
835
836static struct sk_buff *get_tx_skb(struct sock *sk, int size)
837{
838	struct sk_buff *skb;
839
840	skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
841	if (likely(skb)) {
842		skb_reserve(skb, TX_HEADER_LEN);
843		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
844		skb_reset_transport_header(skb);
845	}
846	return skb;
847}
848
849static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
850{
851	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
852	struct sk_buff *skb;
853
854	skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
855			KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
856			sk->sk_allocation);
857	if (likely(skb)) {
858		skb_reserve(skb, (TX_TLSHDR_LEN +
859			    KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
860		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
861		skb_reset_transport_header(skb);
862		ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
863		ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
864	}
865	return skb;
866}
867
868static void tx_skb_finalize(struct sk_buff *skb)
869{
870	struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
871
872	if (!(cb->flags & ULPCB_FLAG_NO_HDR))
873		cb->flags = ULPCB_FLAG_NEED_HDR;
874	cb->flags |= ULPCB_FLAG_NO_APPEND;
875}
876
877static void push_frames_if_head(struct sock *sk)
878{
879	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
880
881	if (skb_queue_len(&csk->txq) == 1)
882		chtls_push_frames(csk, 1);
883}
884
885static int chtls_skb_copy_to_page_nocache(struct sock *sk,
886					  struct iov_iter *from,
887					  struct sk_buff *skb,
888					  struct page *page,
889					  int off, int copy)
890{
891	int err;
892
893	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
894				       off, copy, skb->len);
895	if (err)
896		return err;
897
898	skb->len             += copy;
899	skb->data_len        += copy;
900	skb->truesize        += copy;
901	sk->sk_wmem_queued   += copy;
902	return 0;
903}
904
905static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
906{
907	return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0);
908}
909
910static int csk_wait_memory(struct chtls_dev *cdev,
911			   struct sock *sk, long *timeo_p)
912{
913	DEFINE_WAIT_FUNC(wait, woken_wake_function);
914	int ret, err = 0;
915	long current_timeo;
916	long vm_wait = 0;
917	bool noblock;
918
919	current_timeo = *timeo_p;
920	noblock = (*timeo_p ? false : true);
921	if (csk_mem_free(cdev, sk)) {
922		current_timeo = get_random_u32_below(HZ / 5) + 2;
923		vm_wait = get_random_u32_below(HZ / 5) + 2;
924	}
925
926	add_wait_queue(sk_sleep(sk), &wait);
927	while (1) {
928		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
929
930		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
931			goto do_error;
932		if (!*timeo_p) {
933			if (noblock)
934				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
935			goto do_nonblock;
936		}
937		if (signal_pending(current))
938			goto do_interrupted;
939		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
940		if (csk_mem_free(cdev, sk) && !vm_wait)
941			break;
942
943		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
944		sk->sk_write_pending++;
945		ret = sk_wait_event(sk, &current_timeo, sk->sk_err ||
946				    (sk->sk_shutdown & SEND_SHUTDOWN) ||
947				    (csk_mem_free(cdev, sk) && !vm_wait),
948				    &wait);
949		sk->sk_write_pending--;
950		if (ret < 0)
951			goto do_error;
952
953		if (vm_wait) {
954			vm_wait -= current_timeo;
955			current_timeo = *timeo_p;
956			if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
957				current_timeo -= vm_wait;
958				if (current_timeo < 0)
959					current_timeo = 0;
960			}
961			vm_wait = 0;
962		}
963		*timeo_p = current_timeo;
964	}
965do_rm_wq:
966	remove_wait_queue(sk_sleep(sk), &wait);
967	return err;
968do_error:
969	err = -EPIPE;
970	goto do_rm_wq;
971do_nonblock:
972	err = -EAGAIN;
973	goto do_rm_wq;
974do_interrupted:
975	err = sock_intr_errno(*timeo_p);
976	goto do_rm_wq;
977}
978
979static int chtls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
980			       unsigned char *record_type)
981{
982	struct cmsghdr *cmsg;
983	int rc = -EINVAL;
984
985	for_each_cmsghdr(cmsg, msg) {
986		if (!CMSG_OK(msg, cmsg))
987			return -EINVAL;
988		if (cmsg->cmsg_level != SOL_TLS)
989			continue;
990
991		switch (cmsg->cmsg_type) {
992		case TLS_SET_RECORD_TYPE:
993			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*record_type)))
994				return -EINVAL;
995
996			if (msg->msg_flags & MSG_MORE)
997				return -EINVAL;
998
999			*record_type = *(unsigned char *)CMSG_DATA(cmsg);
1000			rc = 0;
1001			break;
1002		default:
1003			return -EINVAL;
1004		}
1005	}
1006
1007	return rc;
1008}
1009
1010int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1011{
1012	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1013	struct chtls_dev *cdev = csk->cdev;
1014	struct tcp_sock *tp = tcp_sk(sk);
1015	struct sk_buff *skb;
1016	int mss, flags, err;
1017	int recordsz = 0;
1018	int copied = 0;
1019	long timeo;
1020
1021	lock_sock(sk);
1022	flags = msg->msg_flags;
1023	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1024
1025	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
1026		err = sk_stream_wait_connect(sk, &timeo);
1027		if (err)
1028			goto out_err;
1029	}
1030
1031	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1032	err = -EPIPE;
1033	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1034		goto out_err;
1035
1036	mss = csk->mss;
1037	csk_set_flag(csk, CSK_TX_MORE_DATA);
1038
1039	while (msg_data_left(msg)) {
1040		int copy = 0;
1041
1042		skb = skb_peek_tail(&csk->txq);
1043		if (skb) {
1044			copy = mss - skb->len;
1045			skb->ip_summed = CHECKSUM_UNNECESSARY;
1046		}
1047		if (!csk_mem_free(cdev, sk))
1048			goto wait_for_sndbuf;
1049
1050		if (is_tls_tx(csk) && !csk->tlshws.txleft) {
1051			unsigned char record_type = TLS_RECORD_TYPE_DATA;
1052
1053			if (unlikely(msg->msg_controllen)) {
1054				err = chtls_proccess_cmsg(sk, msg,
1055							  &record_type);
1056				if (err)
1057					goto out_err;
1058
1059				/* Avoid appending tls handshake, alert to tls data */
1060				if (skb)
1061					tx_skb_finalize(skb);
1062			}
1063
1064			recordsz = size;
1065			csk->tlshws.txleft = recordsz;
1066			csk->tlshws.type = record_type;
1067		}
1068
1069		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1070		    copy <= 0) {
1071new_buf:
1072			if (skb) {
1073				tx_skb_finalize(skb);
1074				push_frames_if_head(sk);
1075			}
1076
1077			if (is_tls_tx(csk)) {
1078				skb = get_record_skb(sk,
1079						     select_size(sk,
1080								 recordsz,
1081								 flags,
1082								 TX_TLSHDR_LEN),
1083								 false);
1084			} else {
1085				skb = get_tx_skb(sk,
1086						 select_size(sk, size, flags,
1087							     TX_HEADER_LEN));
1088			}
1089			if (unlikely(!skb))
1090				goto wait_for_memory;
1091
1092			skb->ip_summed = CHECKSUM_UNNECESSARY;
1093			copy = mss;
1094		}
1095		if (copy > size)
1096			copy = size;
1097
1098		if (msg->msg_flags & MSG_SPLICE_PAGES) {
1099			err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
1100						   sk->sk_allocation);
1101			if (err < 0) {
1102				if (err == -EMSGSIZE)
1103					goto new_buf;
1104				goto do_fault;
1105			}
1106			copy = err;
1107			sk_wmem_queued_add(sk, copy);
1108		} else if (skb_tailroom(skb) > 0) {
1109			copy = min(copy, skb_tailroom(skb));
1110			if (is_tls_tx(csk))
1111				copy = min_t(int, copy, csk->tlshws.txleft);
1112			err = skb_add_data_nocache(sk, skb,
1113						   &msg->msg_iter, copy);
1114			if (err)
1115				goto do_fault;
1116		} else {
1117			int i = skb_shinfo(skb)->nr_frags;
1118			struct page *page = TCP_PAGE(sk);
1119			int pg_size = PAGE_SIZE;
1120			int off = TCP_OFF(sk);
1121			bool merge;
1122
1123			if (page)
1124				pg_size = page_size(page);
1125			if (off < pg_size &&
1126			    skb_can_coalesce(skb, i, page, off)) {
1127				merge = true;
1128				goto copy;
1129			}
1130			merge = false;
1131			if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
1132			    MAX_SKB_FRAGS))
1133				goto new_buf;
1134
1135			if (page && off == pg_size) {
1136				put_page(page);
1137				TCP_PAGE(sk) = page = NULL;
1138				pg_size = PAGE_SIZE;
1139			}
1140
1141			if (!page) {
1142				gfp_t gfp = sk->sk_allocation;
1143				int order = cdev->send_page_order;
1144
1145				if (order) {
1146					page = alloc_pages(gfp | __GFP_COMP |
1147							   __GFP_NOWARN |
1148							   __GFP_NORETRY,
1149							   order);
1150					if (page)
1151						pg_size <<= order;
1152				}
1153				if (!page) {
1154					page = alloc_page(gfp);
1155					pg_size = PAGE_SIZE;
1156				}
1157				if (!page)
1158					goto wait_for_memory;
1159				off = 0;
1160			}
1161copy:
1162			if (copy > pg_size - off)
1163				copy = pg_size - off;
1164			if (is_tls_tx(csk))
1165				copy = min_t(int, copy, csk->tlshws.txleft);
1166
1167			err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
1168							     skb, page,
1169							     off, copy);
1170			if (unlikely(err)) {
1171				if (!TCP_PAGE(sk)) {
1172					TCP_PAGE(sk) = page;
1173					TCP_OFF(sk) = 0;
1174				}
1175				goto do_fault;
1176			}
1177			/* Update the skb. */
1178			if (merge) {
1179				skb_frag_size_add(
1180						&skb_shinfo(skb)->frags[i - 1],
1181						copy);
1182			} else {
1183				skb_fill_page_desc(skb, i, page, off, copy);
1184				if (off + copy < pg_size) {
1185					/* space left keep page */
1186					get_page(page);
1187					TCP_PAGE(sk) = page;
1188				} else {
1189					TCP_PAGE(sk) = NULL;
1190				}
1191			}
1192			TCP_OFF(sk) = off + copy;
1193		}
1194		if (unlikely(skb->len == mss))
1195			tx_skb_finalize(skb);
1196		tp->write_seq += copy;
1197		copied += copy;
1198		size -= copy;
1199
1200		if (is_tls_tx(csk))
1201			csk->tlshws.txleft -= copy;
1202
1203		if (corked(tp, flags) &&
1204		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1205			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1206
1207		if (size == 0)
1208			goto out;
1209
1210		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
1211			push_frames_if_head(sk);
1212		continue;
1213wait_for_sndbuf:
1214		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1215wait_for_memory:
1216		err = csk_wait_memory(cdev, sk, &timeo);
1217		if (err)
1218			goto do_error;
1219	}
1220out:
1221	csk_reset_flag(csk, CSK_TX_MORE_DATA);
1222	if (copied)
1223		chtls_tcp_push(sk, flags);
1224done:
1225	release_sock(sk);
1226	return copied;
1227do_fault:
1228	if (!skb->len) {
1229		__skb_unlink(skb, &csk->txq);
1230		sk->sk_wmem_queued -= skb->truesize;
1231		__kfree_skb(skb);
1232	}
1233do_error:
1234	if (copied)
1235		goto out;
1236out_err:
1237	if (csk_conn_inline(csk))
1238		csk_reset_flag(csk, CSK_TX_MORE_DATA);
1239	copied = sk_stream_error(sk, flags, err);
1240	goto done;
1241}
1242
1243void chtls_splice_eof(struct socket *sock)
1244{
1245	struct sock *sk = sock->sk;
1246
1247	lock_sock(sk);
1248	chtls_tcp_push(sk, 0);
1249	release_sock(sk);
1250}
1251
1252static void chtls_select_window(struct sock *sk)
1253{
1254	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1255	struct tcp_sock *tp = tcp_sk(sk);
1256	unsigned int wnd = tp->rcv_wnd;
1257
1258	wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
1259	wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
1260
1261	if (wnd > MAX_RCV_WND)
1262		wnd = MAX_RCV_WND;
1263
1264/*
1265 * Check if we need to grow the receive window in response to an increase in
1266 * the socket's receive buffer size.  Some applications increase the buffer
1267 * size dynamically and rely on the window to grow accordingly.
1268 */
1269
1270	if (wnd > tp->rcv_wnd) {
1271		tp->rcv_wup -= wnd - tp->rcv_wnd;
1272		tp->rcv_wnd = wnd;
1273		/* Mark the receive window as updated */
1274		csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
1275	}
1276}
1277
1278/*
1279 * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
1280 * to return without sending the message in case we cannot allocate
1281 * an sk_buff.  Returns the number of credits sent.
1282 */
1283static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
1284{
1285	struct cpl_rx_data_ack *req;
1286	struct sk_buff *skb;
1287
1288	skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
1289	if (!skb)
1290		return 0;
1291	__skb_put(skb, sizeof(*req));
1292	req = (struct cpl_rx_data_ack *)skb->head;
1293
1294	set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
1295	INIT_TP_WR(req, csk->tid);
1296	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1297						    csk->tid));
1298	req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
1299				       RX_FORCE_ACK_F);
1300	cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
1301	return credits;
1302}
1303
1304#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
1305			     TCPF_FIN_WAIT1 | \
1306			     TCPF_FIN_WAIT2)
1307
1308/*
1309 * Called after some received data has been read.  It returns RX credits
1310 * to the HW for the amount of data processed.
1311 */
1312static void chtls_cleanup_rbuf(struct sock *sk, int copied)
1313{
1314	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1315	struct tcp_sock *tp;
1316	int must_send;
1317	u32 credits;
1318	u32 thres;
1319
1320	thres = 15 * 1024;
1321
1322	if (!sk_in_state(sk, CREDIT_RETURN_STATE))
1323		return;
1324
1325	chtls_select_window(sk);
1326	tp = tcp_sk(sk);
1327	credits = tp->copied_seq - tp->rcv_wup;
1328	if (unlikely(!credits))
1329		return;
1330
1331/*
1332 * For coalescing to work effectively ensure the receive window has
1333 * at least 16KB left.
1334 */
1335	must_send = credits + 16384 >= tp->rcv_wnd;
1336
1337	if (must_send || credits >= thres)
1338		tp->rcv_wup += send_rx_credits(csk, credits);
1339}
1340
1341static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1342			    int flags, int *addr_len)
1343{
1344	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1345	struct chtls_hws *hws = &csk->tlshws;
1346	struct net_device *dev = csk->egress_dev;
1347	struct adapter *adap = netdev2adap(dev);
1348	struct tcp_sock *tp = tcp_sk(sk);
1349	unsigned long avail;
1350	int buffers_freed;
1351	int copied = 0;
1352	int target;
1353	long timeo;
1354	int ret;
1355
1356	buffers_freed = 0;
1357
1358	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1359	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1360
1361	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1362		chtls_cleanup_rbuf(sk, copied);
1363
1364	do {
1365		struct sk_buff *skb;
1366		u32 offset = 0;
1367
1368		if (unlikely(tp->urg_data &&
1369			     tp->urg_seq == tp->copied_seq)) {
1370			if (copied)
1371				break;
1372			if (signal_pending(current)) {
1373				copied = timeo ? sock_intr_errno(timeo) :
1374					-EAGAIN;
1375				break;
1376			}
1377		}
1378		skb = skb_peek(&sk->sk_receive_queue);
1379		if (skb)
1380			goto found_ok_skb;
1381		if (csk->wr_credits &&
1382		    skb_queue_len(&csk->txq) &&
1383		    chtls_push_frames(csk, csk->wr_credits ==
1384				      csk->wr_max_credits))
1385			sk->sk_write_space(sk);
1386
1387		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1388			break;
1389
1390		if (copied) {
1391			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1392			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1393			    signal_pending(current))
1394				break;
1395
1396			if (!timeo)
1397				break;
1398		} else {
1399			if (sock_flag(sk, SOCK_DONE))
1400				break;
1401			if (sk->sk_err) {
1402				copied = sock_error(sk);
1403				break;
1404			}
1405			if (sk->sk_shutdown & RCV_SHUTDOWN)
1406				break;
1407			if (sk->sk_state == TCP_CLOSE) {
1408				copied = -ENOTCONN;
1409				break;
1410			}
1411			if (!timeo) {
1412				copied = -EAGAIN;
1413				break;
1414			}
1415			if (signal_pending(current)) {
1416				copied = sock_intr_errno(timeo);
1417				break;
1418			}
1419		}
1420		if (READ_ONCE(sk->sk_backlog.tail)) {
1421			release_sock(sk);
1422			lock_sock(sk);
1423			chtls_cleanup_rbuf(sk, copied);
1424			continue;
1425		}
1426
1427		if (copied >= target)
1428			break;
1429		chtls_cleanup_rbuf(sk, copied);
1430		ret = sk_wait_data(sk, &timeo, NULL);
1431		if (ret < 0) {
1432			copied = copied ? : ret;
1433			goto unlock;
1434		}
1435		continue;
1436found_ok_skb:
1437		if (!skb->len) {
1438			skb_dst_set(skb, NULL);
1439			__skb_unlink(skb, &sk->sk_receive_queue);
1440			kfree_skb(skb);
1441
1442			if (!copied && !timeo) {
1443				copied = -EAGAIN;
1444				break;
1445			}
1446
1447			if (copied < target) {
1448				release_sock(sk);
1449				lock_sock(sk);
1450				continue;
1451			}
1452			break;
1453		}
1454		offset = hws->copied_seq;
1455		avail = skb->len - offset;
1456		if (len < avail)
1457			avail = len;
1458
1459		if (unlikely(tp->urg_data)) {
1460			u32 urg_offset = tp->urg_seq - tp->copied_seq;
1461
1462			if (urg_offset < avail) {
1463				if (urg_offset) {
1464					avail = urg_offset;
1465				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
1466					/* First byte is urgent, skip */
1467					tp->copied_seq++;
1468					offset++;
1469					avail--;
1470					if (!avail)
1471						goto skip_copy;
1472				}
1473			}
1474		}
1475		/* Set record type if not already done. For a non-data record,
1476		 * do not proceed if record type could not be copied.
1477		 */
1478		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1479			struct tls_hdr *thdr = (struct tls_hdr *)skb->data;
1480			int cerr = 0;
1481
1482			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
1483					sizeof(thdr->type), &thdr->type);
1484
1485			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) {
1486				copied = -EIO;
1487				break;
1488			}
1489			/*  don't send tls header, skip copy */
1490			goto skip_copy;
1491		}
1492
1493		if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
1494			if (!copied) {
1495				copied = -EFAULT;
1496				break;
1497			}
1498		}
1499
1500		copied += avail;
1501		len -= avail;
1502		hws->copied_seq += avail;
1503skip_copy:
1504		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1505			tp->urg_data = 0;
1506
1507		if ((avail + offset) >= skb->len) {
1508			struct sk_buff *next_skb;
1509			if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1510				tp->copied_seq += skb->len;
1511				hws->rcvpld = skb->hdr_len;
1512			} else {
1513				atomic_inc(&adap->chcr_stats.tls_pdu_rx);
1514				tp->copied_seq += hws->rcvpld;
1515			}
1516			chtls_free_skb(sk, skb);
1517			buffers_freed++;
1518			hws->copied_seq = 0;
1519			next_skb = skb_peek(&sk->sk_receive_queue);
1520			if (copied >= target && !next_skb)
1521				break;
1522			if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
1523				break;
1524		}
1525	} while (len > 0);
1526
1527	if (buffers_freed)
1528		chtls_cleanup_rbuf(sk, copied);
1529
1530unlock:
1531	release_sock(sk);
1532	return copied;
1533}
1534
1535/*
1536 * Peek at data in a socket's receive buffer.
1537 */
1538static int peekmsg(struct sock *sk, struct msghdr *msg,
1539		   size_t len, int flags)
1540{
1541	struct tcp_sock *tp = tcp_sk(sk);
1542	u32 peek_seq, offset;
1543	struct sk_buff *skb;
1544	int copied = 0;
1545	size_t avail;          /* amount of available data in current skb */
1546	long timeo;
1547	int ret;
1548
1549	lock_sock(sk);
1550	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1551	peek_seq = tp->copied_seq;
1552
1553	do {
1554		if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
1555			if (copied)
1556				break;
1557			if (signal_pending(current)) {
1558				copied = timeo ? sock_intr_errno(timeo) :
1559				-EAGAIN;
1560				break;
1561			}
1562		}
1563
1564		skb_queue_walk(&sk->sk_receive_queue, skb) {
1565			offset = peek_seq - ULP_SKB_CB(skb)->seq;
1566			if (offset < skb->len)
1567				goto found_ok_skb;
1568		}
1569
1570		/* empty receive queue */
1571		if (copied)
1572			break;
1573		if (sock_flag(sk, SOCK_DONE))
1574			break;
1575		if (sk->sk_err) {
1576			copied = sock_error(sk);
1577			break;
1578		}
1579		if (sk->sk_shutdown & RCV_SHUTDOWN)
1580			break;
1581		if (sk->sk_state == TCP_CLOSE) {
1582			copied = -ENOTCONN;
1583			break;
1584		}
1585		if (!timeo) {
1586			copied = -EAGAIN;
1587			break;
1588		}
1589		if (signal_pending(current)) {
1590			copied = sock_intr_errno(timeo);
1591			break;
1592		}
1593
1594		if (READ_ONCE(sk->sk_backlog.tail)) {
1595			/* Do not sleep, just process backlog. */
1596			release_sock(sk);
1597			lock_sock(sk);
1598		} else {
1599			ret = sk_wait_data(sk, &timeo, NULL);
1600			if (ret < 0) {
1601				/* here 'copied' is 0 due to previous checks */
1602				copied = ret;
1603				break;
1604			}
1605		}
1606
1607		if (unlikely(peek_seq != tp->copied_seq)) {
1608			if (net_ratelimit())
1609				pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
1610					current->comm, current->pid);
1611			peek_seq = tp->copied_seq;
1612		}
1613		continue;
1614
1615found_ok_skb:
1616		avail = skb->len - offset;
1617		if (len < avail)
1618			avail = len;
1619		/*
1620		 * Do we have urgent data here?  We need to skip over the
1621		 * urgent byte.
1622		 */
1623		if (unlikely(tp->urg_data)) {
1624			u32 urg_offset = tp->urg_seq - peek_seq;
1625
1626			if (urg_offset < avail) {
1627				/*
1628				 * The amount of data we are preparing to copy
1629				 * contains urgent data.
1630				 */
1631				if (!urg_offset) { /* First byte is urgent */
1632					if (!sock_flag(sk, SOCK_URGINLINE)) {
1633						peek_seq++;
1634						offset++;
1635						avail--;
1636					}
1637					if (!avail)
1638						continue;
1639				} else {
1640					/* stop short of the urgent data */
1641					avail = urg_offset;
1642				}
1643			}
1644		}
1645
1646		/*
1647		 * If MSG_TRUNC is specified the data is discarded.
1648		 */
1649		if (likely(!(flags & MSG_TRUNC)))
1650			if (skb_copy_datagram_msg(skb, offset, msg, len)) {
1651				if (!copied) {
1652					copied = -EFAULT;
1653					break;
1654				}
1655			}
1656		peek_seq += avail;
1657		copied += avail;
1658		len -= avail;
1659	} while (len > 0);
1660
1661	release_sock(sk);
1662	return copied;
1663}
1664
1665int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1666		  int flags, int *addr_len)
1667{
1668	struct tcp_sock *tp = tcp_sk(sk);
1669	struct chtls_sock *csk;
1670	unsigned long avail;    /* amount of available data in current skb */
1671	int buffers_freed;
1672	int copied = 0;
1673	long timeo;
1674	int target;             /* Read at least this many bytes */
1675	int ret;
1676
1677	buffers_freed = 0;
1678
1679	if (unlikely(flags & MSG_OOB))
1680		return tcp_prot.recvmsg(sk, msg, len, flags, addr_len);
1681
1682	if (unlikely(flags & MSG_PEEK))
1683		return peekmsg(sk, msg, len, flags);
1684
1685	if (sk_can_busy_loop(sk) &&
1686	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
1687	    sk->sk_state == TCP_ESTABLISHED)
1688		sk_busy_loop(sk, flags & MSG_DONTWAIT);
1689
1690	lock_sock(sk);
1691	csk = rcu_dereference_sk_user_data(sk);
1692
1693	if (is_tls_rx(csk))
1694		return chtls_pt_recvmsg(sk, msg, len, flags, addr_len);
1695
1696	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1697	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1698
1699	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1700		chtls_cleanup_rbuf(sk, copied);
1701
1702	do {
1703		struct sk_buff *skb;
1704		u32 offset;
1705
1706		if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
1707			if (copied)
1708				break;
1709			if (signal_pending(current)) {
1710				copied = timeo ? sock_intr_errno(timeo) :
1711					-EAGAIN;
1712				break;
1713			}
1714		}
1715
1716		skb = skb_peek(&sk->sk_receive_queue);
1717		if (skb)
1718			goto found_ok_skb;
1719
1720		if (csk->wr_credits &&
1721		    skb_queue_len(&csk->txq) &&
1722		    chtls_push_frames(csk, csk->wr_credits ==
1723				      csk->wr_max_credits))
1724			sk->sk_write_space(sk);
1725
1726		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
1727			break;
1728
1729		if (copied) {
1730			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1731			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1732			    signal_pending(current))
1733				break;
1734		} else {
1735			if (sock_flag(sk, SOCK_DONE))
1736				break;
1737			if (sk->sk_err) {
1738				copied = sock_error(sk);
1739				break;
1740			}
1741			if (sk->sk_shutdown & RCV_SHUTDOWN)
1742				break;
1743			if (sk->sk_state == TCP_CLOSE) {
1744				copied = -ENOTCONN;
1745				break;
1746			}
1747			if (!timeo) {
1748				copied = -EAGAIN;
1749				break;
1750			}
1751			if (signal_pending(current)) {
1752				copied = sock_intr_errno(timeo);
1753				break;
1754			}
1755		}
1756
1757		if (READ_ONCE(sk->sk_backlog.tail)) {
1758			release_sock(sk);
1759			lock_sock(sk);
1760			chtls_cleanup_rbuf(sk, copied);
1761			continue;
1762		}
1763
1764		if (copied >= target)
1765			break;
1766		chtls_cleanup_rbuf(sk, copied);
1767		ret = sk_wait_data(sk, &timeo, NULL);
1768		if (ret < 0) {
1769			copied = copied ? : ret;
1770			goto unlock;
1771		}
1772		continue;
1773
1774found_ok_skb:
1775		if (!skb->len) {
1776			chtls_kfree_skb(sk, skb);
1777			if (!copied && !timeo) {
1778				copied = -EAGAIN;
1779				break;
1780			}
1781
1782			if (copied < target)
1783				continue;
1784
1785			break;
1786		}
1787
1788		offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
1789		avail = skb->len - offset;
1790		if (len < avail)
1791			avail = len;
1792
1793		if (unlikely(tp->urg_data)) {
1794			u32 urg_offset = tp->urg_seq - tp->copied_seq;
1795
1796			if (urg_offset < avail) {
1797				if (urg_offset) {
1798					avail = urg_offset;
1799				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
1800					tp->copied_seq++;
1801					offset++;
1802					avail--;
1803					if (!avail)
1804						goto skip_copy;
1805				}
1806			}
1807		}
1808
1809		if (likely(!(flags & MSG_TRUNC))) {
1810			if (skb_copy_datagram_msg(skb, offset,
1811						  msg, avail)) {
1812				if (!copied) {
1813					copied = -EFAULT;
1814					break;
1815				}
1816			}
1817		}
1818
1819		tp->copied_seq += avail;
1820		copied += avail;
1821		len -= avail;
1822
1823skip_copy:
1824		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1825			tp->urg_data = 0;
1826
1827		if (avail + offset >= skb->len) {
1828			chtls_free_skb(sk, skb);
1829			buffers_freed++;
1830
1831			if  (copied >= target &&
1832			     !skb_peek(&sk->sk_receive_queue))
1833				break;
1834		}
1835	} while (len > 0);
1836
1837	if (buffers_freed)
1838		chtls_cleanup_rbuf(sk, copied);
1839
1840unlock:
1841	release_sock(sk);
1842	return copied;
1843}
1844