iw_cxgb_cm.c revision 283291
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c 283291 2015-05-22 17:05:21Z jkim $");
31
32#include "opt_inet.h"
33
34#ifdef TCP_OFFLOAD
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/pciio.h>
40#include <sys/conf.h>
41#include <machine/bus.h>
42#include <machine/resource.h>
43#include <sys/bus_dma.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/rwlock.h>
48#include <sys/linker.h>
49#include <sys/firmware.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59#include <sys/uio.h>
60
61#include <net/route.h>
62#include <netinet/in_systm.h>
63#include <netinet/in.h>
64#include <netinet/in_pcb.h>
65#include <netinet/ip.h>
66#include <netinet/ip_var.h>
67#include <netinet/tcp_var.h>
68#include <netinet/tcp.h>
69#include <netinet/tcpip.h>
70
71#include <rdma/ib_verbs.h>
72#include <linux/idr.h>
73#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
74
75#include <cxgb_include.h>
76#include <ulp/tom/cxgb_tom.h>
77#include <ulp/tom/cxgb_toepcb.h>
78#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
79#include <rdma/ib_verbs.h>
80#include <linux/idr.h>
81
82#include <ulp/iw_cxgb/iw_cxgb_wr.h>
83#include <ulp/iw_cxgb/iw_cxgb_hal.h>
84#include <ulp/iw_cxgb/iw_cxgb_provider.h>
85#include <ulp/iw_cxgb/iw_cxgb_cm.h>
86#include <ulp/iw_cxgb/iw_cxgb.h>
87
88#ifdef KTR
89static char *states[] = {
90	"idle",
91	"listen",
92	"connecting",
93	"mpa_wait_req",
94	"mpa_req_sent",
95	"mpa_req_rcvd",
96	"mpa_rep_sent",
97	"fpdu_mode",
98	"aborting",
99	"closing",
100	"moribund",
101	"dead",
102	NULL,
103};
104#endif
105
106SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
107
108static int ep_timeout_secs = 60;
109SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0,
110    "CM Endpoint operation timeout in seconds (default=60)");
111
112static int mpa_rev = 1;
113SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0,
114    "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
115
116static int markers_enabled = 0;
117SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0,
118    "Enable MPA MARKERS (default(0)=disabled)");
119
120static int crc_enabled = 1;
121SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0,
122    "Enable MPA CRC (default(1)=enabled)");
123
124static int rcv_win = 256 * 1024;
125SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0,
126    "TCP receive window in bytes (default=256KB)");
127
128static int snd_win = 32 * 1024;
129SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0,
130    "TCP send window in bytes (default=32KB)");
131
132static unsigned int nocong = 0;
133SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RWTUN, &nocong, 0,
134    "Turn off congestion control (default=0)");
135
136static unsigned int cong_flavor = 1;
137SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RWTUN, &cong_flavor, 0,
138    "TCP Congestion control flavor (default=1)");
139
140static void ep_timeout(void *arg);
141static void connect_reply_upcall(struct iwch_ep *ep, int status);
142static int iwch_so_upcall(struct socket *so, void *arg, int waitflag);
143
144/*
145 * Cruft to offload socket upcalls onto thread.
146 */
147static struct mtx req_lock;
148static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list;
149static struct task iw_cxgb_task;
150static struct taskqueue *iw_cxgb_taskq;
151static void process_req(void *ctx, int pending);
152
153static void
154start_ep_timer(struct iwch_ep *ep)
155{
156	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
157	if (callout_pending(&ep->timer)) {
158		CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep);
159		callout_deactivate(&ep->timer);
160		callout_drain(&ep->timer);
161	} else {
162		/*
163		 * XXX this looks racy
164		 */
165		get_ep(&ep->com);
166		callout_init(&ep->timer, 1);
167	}
168	callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep);
169}
170
171static void
172stop_ep_timer(struct iwch_ep *ep)
173{
174	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
175	if (!callout_pending(&ep->timer)) {
176		CTR3(KTR_IW_CXGB, "%s timer stopped when its not running!  ep %p state %u\n",
177                       __func__, ep, ep->com.state);
178		return;
179	}
180	callout_drain(&ep->timer);
181	put_ep(&ep->com);
182}
183
184static int
185set_tcpinfo(struct iwch_ep *ep)
186{
187	struct socket *so = ep->com.so;
188	struct inpcb *inp = sotoinpcb(so);
189	struct tcpcb *tp;
190	struct toepcb *toep;
191	int rc = 0;
192
193	INP_WLOCK(inp);
194	tp = intotcpcb(inp);
195
196	if ((tp->t_flags & TF_TOE) == 0) {
197		rc = EINVAL;
198		printf("%s: connection NOT OFFLOADED!\n", __func__);
199		goto done;
200	}
201	toep = tp->t_toe;
202
203	ep->hwtid = toep->tp_tid;
204	ep->snd_seq = tp->snd_nxt;
205	ep->rcv_seq = tp->rcv_nxt;
206	ep->emss = tp->t_maxseg;
207	if (ep->emss < 128)
208		ep->emss = 128;
209done:
210	INP_WUNLOCK(inp);
211	return (rc);
212
213}
214
215static enum iwch_ep_state
216state_read(struct iwch_ep_common *epc)
217{
218	enum iwch_ep_state state;
219
220	mtx_lock(&epc->lock);
221	state = epc->state;
222	mtx_unlock(&epc->lock);
223	return state;
224}
225
226static void
227__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
228{
229	epc->state = new;
230}
231
232static void
233state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
234{
235
236	mtx_lock(&epc->lock);
237	CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]);
238	__state_set(epc, new);
239	mtx_unlock(&epc->lock);
240	return;
241}
242
243static void *
244alloc_ep(int size, int flags)
245{
246	struct iwch_ep_common *epc;
247
248	epc = malloc(size, M_DEVBUF, flags);
249	if (epc) {
250		memset(epc, 0, size);
251		refcount_init(&epc->refcount, 1);
252		mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK);
253		cv_init(&epc->waitq, "iwch_epc cv");
254	}
255	CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc);
256	return epc;
257}
258
259void __free_ep(struct iwch_ep_common *epc)
260{
261	CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
262	KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so));
263	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
264	free(epc, M_DEVBUF);
265}
266
267static struct rtentry *
268find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
269    __be16 peer_port, u8 tos)
270{
271        struct route iproute;
272        struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
273
274        bzero(&iproute, sizeof iproute);
275	dst->sin_family = AF_INET;
276	dst->sin_len = sizeof *dst;
277        dst->sin_addr.s_addr = peer_ip;
278
279        rtalloc(&iproute);
280	return iproute.ro_rt;
281}
282
283static void
284close_socket(struct iwch_ep_common *epc, int close)
285{
286	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
287	SOCK_LOCK(epc->so);
288	soupcall_clear(epc->so, SO_RCV);
289	SOCK_UNLOCK(epc->so);
290	if (close)
291		soclose(epc->so);
292	else
293		soshutdown(epc->so, SHUT_WR|SHUT_RD);
294	epc->so = NULL;
295}
296
297static void
298shutdown_socket(struct iwch_ep_common *epc)
299{
300	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
301	soshutdown(epc->so, SHUT_WR);
302}
303
304static void
305abort_socket(struct iwch_ep *ep)
306{
307	struct sockopt sopt;
308	int err;
309	struct linger l;
310
311	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
312	l.l_onoff = 1;
313	l.l_linger = 0;
314
315	/* linger_time of 0 forces RST to be sent */
316	sopt.sopt_dir = SOPT_SET;
317	sopt.sopt_level = SOL_SOCKET;
318	sopt.sopt_name = SO_LINGER;
319	sopt.sopt_val = (caddr_t)&l;
320	sopt.sopt_valsize = sizeof l;
321	sopt.sopt_td = NULL;
322	err = sosetopt(ep->com.so, &sopt);
323	if (err)
324		printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err);
325}
326
327static void
328send_mpa_req(struct iwch_ep *ep)
329{
330	int mpalen;
331	struct mpa_message *mpa;
332	struct mbuf *m;
333	int err;
334
335	CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen);
336
337	mpalen = sizeof(*mpa) + ep->plen;
338	m = m_gethdr(mpalen, M_NOWAIT);
339	if (m == NULL) {
340		connect_reply_upcall(ep, -ENOMEM);
341		return;
342	}
343	mpa = mtod(m, struct mpa_message *);
344	m->m_len = mpalen;
345	m->m_pkthdr.len = mpalen;
346	memset(mpa, 0, sizeof(*mpa));
347	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
348	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
349		     (markers_enabled ? MPA_MARKERS : 0);
350	mpa->private_data_size = htons(ep->plen);
351	mpa->revision = mpa_rev;
352	if (ep->plen)
353		memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
354
355	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
356	if (err) {
357		m_freem(m);
358		connect_reply_upcall(ep, -ENOMEM);
359		return;
360	}
361
362	start_ep_timer(ep);
363	state_set(&ep->com, MPA_REQ_SENT);
364	return;
365}
366
367static int
368send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
369{
370	int mpalen;
371	struct mpa_message *mpa;
372	struct mbuf *m;
373	int err;
374
375	CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen);
376
377	mpalen = sizeof(*mpa) + plen;
378
379	m = m_gethdr(mpalen, M_NOWAIT);
380	if (m == NULL) {
381		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
382		return (-ENOMEM);
383	}
384	mpa = mtod(m, struct mpa_message *);
385	m->m_len = mpalen;
386	m->m_pkthdr.len = mpalen;
387	memset(mpa, 0, sizeof(*mpa));
388	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
389	mpa->flags = MPA_REJECT;
390	mpa->revision = mpa_rev;
391	mpa->private_data_size = htons(plen);
392	if (plen)
393		memcpy(mpa->private_data, pdata, plen);
394	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
395	PANIC_IF(err);
396	return 0;
397}
398
399static int
400send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
401{
402	int mpalen;
403	struct mpa_message *mpa;
404	struct mbuf *m;
405
406	CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen);
407
408	mpalen = sizeof(*mpa) + plen;
409
410	m = m_gethdr(mpalen, M_NOWAIT);
411	if (m == NULL) {
412		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
413		return (-ENOMEM);
414	}
415	mpa = mtod(m, struct mpa_message *);
416	m->m_len = mpalen;
417	m->m_pkthdr.len = mpalen;
418	memset(mpa, 0, sizeof(*mpa));
419	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
420	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
421		     (markers_enabled ? MPA_MARKERS : 0);
422	mpa->revision = mpa_rev;
423	mpa->private_data_size = htons(plen);
424	if (plen)
425		memcpy(mpa->private_data, pdata, plen);
426
427	state_set(&ep->com, MPA_REP_SENT);
428	return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
429		ep->com.thread);
430}
431
432static void
433close_complete_upcall(struct iwch_ep *ep)
434{
435	struct iw_cm_event event;
436
437	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
438	memset(&event, 0, sizeof(event));
439	event.event = IW_CM_EVENT_CLOSE;
440	if (ep->com.cm_id) {
441		CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d",
442		     ep, ep->com.cm_id, ep->hwtid);
443		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
444		ep->com.cm_id->rem_ref(ep->com.cm_id);
445		ep->com.cm_id = NULL;
446		ep->com.qp = NULL;
447	}
448}
449
450static void
451abort_connection(struct iwch_ep *ep)
452{
453	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
454	state_set(&ep->com, ABORTING);
455	abort_socket(ep);
456	close_socket(&ep->com, 0);
457	close_complete_upcall(ep);
458	state_set(&ep->com, DEAD);
459	put_ep(&ep->com);
460}
461
462static void
463peer_close_upcall(struct iwch_ep *ep)
464{
465	struct iw_cm_event event;
466
467	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
468	memset(&event, 0, sizeof(event));
469	event.event = IW_CM_EVENT_DISCONNECT;
470	if (ep->com.cm_id) {
471		CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d",
472		     ep, ep->com.cm_id, ep->hwtid);
473		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
474	}
475}
476
477static void
478peer_abort_upcall(struct iwch_ep *ep)
479{
480	struct iw_cm_event event;
481
482	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
483	memset(&event, 0, sizeof(event));
484	event.event = IW_CM_EVENT_CLOSE;
485	event.status = ECONNRESET;
486	if (ep->com.cm_id) {
487		CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep,
488		     ep->com.cm_id, ep->hwtid);
489		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
490		ep->com.cm_id->rem_ref(ep->com.cm_id);
491		ep->com.cm_id = NULL;
492		ep->com.qp = NULL;
493	}
494}
495
496static void
497connect_reply_upcall(struct iwch_ep *ep, int status)
498{
499	struct iw_cm_event event;
500
501	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status);
502	memset(&event, 0, sizeof(event));
503	event.event = IW_CM_EVENT_CONNECT_REPLY;
504	event.status = status;
505	event.local_addr = ep->com.local_addr;
506	event.remote_addr = ep->com.remote_addr;
507
508	if ((status == 0) || (status == ECONNREFUSED)) {
509		event.private_data_len = ep->plen;
510		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
511	}
512	if (ep->com.cm_id) {
513		CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep,
514		     ep->hwtid, status);
515		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
516	}
517	if (status < 0) {
518		ep->com.cm_id->rem_ref(ep->com.cm_id);
519		ep->com.cm_id = NULL;
520		ep->com.qp = NULL;
521	}
522}
523
524static void
525connect_request_upcall(struct iwch_ep *ep)
526{
527	struct iw_cm_event event;
528
529	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
530	memset(&event, 0, sizeof(event));
531	event.event = IW_CM_EVENT_CONNECT_REQUEST;
532	event.local_addr = ep->com.local_addr;
533	event.remote_addr = ep->com.remote_addr;
534	event.private_data_len = ep->plen;
535	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
536	event.provider_data = ep;
537	event.so = ep->com.so;
538	if (state_read(&ep->parent_ep->com) != DEAD) {
539		get_ep(&ep->com);
540		ep->parent_ep->com.cm_id->event_handler(
541						ep->parent_ep->com.cm_id,
542						&event);
543	}
544	put_ep(&ep->parent_ep->com);
545}
546
547static void
548established_upcall(struct iwch_ep *ep)
549{
550	struct iw_cm_event event;
551
552	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
553	memset(&event, 0, sizeof(event));
554	event.event = IW_CM_EVENT_ESTABLISHED;
555	if (ep->com.cm_id) {
556		CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid);
557		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
558	}
559}
560
561static void
562process_mpa_reply(struct iwch_ep *ep)
563{
564	struct mpa_message *mpa;
565	u16 plen;
566	struct iwch_qp_attributes attrs;
567	enum iwch_qp_attr_mask mask;
568	int err;
569	struct mbuf *top, *m;
570	int flags = MSG_DONTWAIT;
571	struct uio uio;
572	int len;
573
574	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
575
576	/*
577	 * Stop mpa timer.  If it expired, then the state has
578	 * changed and we bail since ep_timeout already aborted
579	 * the connection.
580	 */
581	stop_ep_timer(ep);
582	if (state_read(&ep->com) != MPA_REQ_SENT)
583		return;
584
585	uio.uio_resid = len = 1000000;
586	uio.uio_td = ep->com.thread;
587	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
588	if (err) {
589		if (err == EWOULDBLOCK) {
590			start_ep_timer(ep);
591			return;
592		}
593		err = -err;
594		goto err;
595	}
596
597	if (ep->com.so->so_rcv.sb_mb) {
598		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
599			__FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
600	}
601
602	m = top;
603	do {
604		/*
605		 * If we get more than the supported amount of private data
606		 * then we must fail this connection.
607		 */
608		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
609			err = (-EINVAL);
610			goto err;
611		}
612
613		/*
614		 * copy the new data into our accumulation buffer.
615		 */
616		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
617		ep->mpa_pkt_len += m->m_len;
618		if (!m->m_next)
619			m = m->m_nextpkt;
620		else
621			m = m->m_next;
622	} while (m);
623
624	m_freem(top);
625
626	/*
627	 * if we don't even have the mpa message, then bail.
628	 */
629	if (ep->mpa_pkt_len < sizeof(*mpa))
630		return;
631	mpa = (struct mpa_message *)ep->mpa_pkt;
632
633	/* Validate MPA header. */
634	if (mpa->revision != mpa_rev) {
635		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
636		err = EPROTO;
637		goto err;
638	}
639	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
640		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
641		err = EPROTO;
642		goto err;
643	}
644
645	plen = ntohs(mpa->private_data_size);
646
647	/*
648	 * Fail if there's too much private data.
649	 */
650	if (plen > MPA_MAX_PRIVATE_DATA) {
651		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
652		err = EPROTO;
653		goto err;
654	}
655
656	/*
657	 * If plen does not account for pkt size
658	 */
659	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
660		CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len);
661		err = EPROTO;
662		goto err;
663	}
664
665	ep->plen = (u8) plen;
666
667	/*
668	 * If we don't have all the pdata yet, then bail.
669	 * We'll continue process when more data arrives.
670	 */
671	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
672		return;
673
674	if (mpa->flags & MPA_REJECT) {
675		err = ECONNREFUSED;
676		goto err;
677	}
678
679	/*
680	 * If we get here we have accumulated the entire mpa
681	 * start reply message including private data. And
682	 * the MPA header is valid.
683	 */
684	CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
685	state_set(&ep->com, FPDU_MODE);
686	ep->mpa_attr.initiator = 1;
687	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
688	ep->mpa_attr.recv_marker_enabled = markers_enabled;
689	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
690	ep->mpa_attr.version = mpa_rev;
691	if (set_tcpinfo(ep)) {
692		printf("%s set_tcpinfo error\n", __FUNCTION__);
693		goto err;
694	}
695	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
696	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
697	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
698	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
699
700	attrs.mpa_attr = ep->mpa_attr;
701	attrs.max_ird = ep->ird;
702	attrs.max_ord = ep->ord;
703	attrs.llp_stream_handle = ep;
704	attrs.next_state = IWCH_QP_STATE_RTS;
705
706	mask = IWCH_QP_ATTR_NEXT_STATE |
707	    IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
708	    IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
709
710	/* bind QP and TID with INIT_WR */
711	err = iwch_modify_qp(ep->com.qp->rhp,
712			     ep->com.qp, mask, &attrs, 1);
713	if (!err)
714		goto out;
715err:
716	abort_connection(ep);
717out:
718	connect_reply_upcall(ep, err);
719	return;
720}
721
722static void
723process_mpa_request(struct iwch_ep *ep)
724{
725	struct mpa_message *mpa;
726	u16 plen;
727	int flags = MSG_DONTWAIT;
728	struct mbuf *top, *m;
729	int err;
730	struct uio uio;
731	int len;
732
733	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
734
735	/*
736	 * Stop mpa timer.  If it expired, then the state has
737	 * changed and we bail since ep_timeout already aborted
738	 * the connection.
739	 */
740	stop_ep_timer(ep);
741	if (state_read(&ep->com) != MPA_REQ_WAIT)
742		return;
743
744	uio.uio_resid = len = 1000000;
745	uio.uio_td = ep->com.thread;
746	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
747	if (err) {
748		if (err == EWOULDBLOCK) {
749			start_ep_timer(ep);
750			return;
751		}
752		err = -err;
753		goto err;
754	}
755
756	m = top;
757	do {
758
759		/*
760		 * If we get more than the supported amount of private data
761		 * then we must fail this connection.
762		 */
763		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
764			CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__,
765				ep->mpa_pkt_len + m->m_len);
766			goto err;
767		}
768
769
770		/*
771		 * Copy the new data into our accumulation buffer.
772		 */
773		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
774		ep->mpa_pkt_len += m->m_len;
775
776		if (!m->m_next)
777			m = m->m_nextpkt;
778		else
779			m = m->m_next;
780	} while (m);
781
782	m_freem(top);
783
784	/*
785	 * If we don't even have the mpa message, then bail.
786	 * We'll continue process when more data arrives.
787	 */
788	if (ep->mpa_pkt_len < sizeof(*mpa)) {
789		start_ep_timer(ep);
790		CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__,
791			ep->mpa_pkt_len);
792		return;
793	}
794	mpa = (struct mpa_message *) ep->mpa_pkt;
795
796	/*
797	 * Validate MPA Header.
798	 */
799	if (mpa->revision != mpa_rev) {
800		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
801		goto err;
802	}
803
804	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
805		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
806		goto err;
807	}
808
809	plen = ntohs(mpa->private_data_size);
810
811	/*
812	 * Fail if there's too much private data.
813	 */
814	if (plen > MPA_MAX_PRIVATE_DATA) {
815		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
816		goto err;
817	}
818
819	/*
820	 * If plen does not account for pkt size
821	 */
822	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
823		CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__,
824			ep->mpa_pkt_len);
825		goto err;
826	}
827	ep->plen = (u8) plen;
828
829	/*
830	 * If we don't have all the pdata yet, then bail.
831	 */
832	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
833		start_ep_timer(ep);
834		CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__,
835			ep->mpa_pkt_len);
836		return;
837	}
838
839	/*
840	 * If we get here we have accumulated the entire mpa
841	 * start reply message including private data.
842	 */
843	ep->mpa_attr.initiator = 0;
844	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
845	ep->mpa_attr.recv_marker_enabled = markers_enabled;
846	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
847	ep->mpa_attr.version = mpa_rev;
848	if (set_tcpinfo(ep)) {
849		printf("%s set_tcpinfo error\n", __FUNCTION__);
850		goto err;
851	}
852	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
853	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
854	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
855	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
856
857	state_set(&ep->com, MPA_REQ_RCVD);
858
859	/* drive upcall */
860	connect_request_upcall(ep);
861	return;
862err:
863	abort_connection(ep);
864	return;
865}
866
867static void
868process_peer_close(struct iwch_ep *ep)
869{
870	struct iwch_qp_attributes attrs;
871	int disconnect = 1;
872	int release = 0;
873
874	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
875
876	mtx_lock(&ep->com.lock);
877	switch (ep->com.state) {
878	case MPA_REQ_WAIT:
879		__state_set(&ep->com, CLOSING);
880		break;
881	case MPA_REQ_SENT:
882		__state_set(&ep->com, CLOSING);
883		connect_reply_upcall(ep, -ECONNRESET);
884		break;
885	case MPA_REQ_RCVD:
886
887		/*
888		 * We're gonna mark this puppy DEAD, but keep
889		 * the reference on it until the ULP accepts or
890		 * rejects the CR.
891		 */
892		__state_set(&ep->com, CLOSING);
893		break;
894	case MPA_REP_SENT:
895		__state_set(&ep->com, CLOSING);
896		break;
897	case FPDU_MODE:
898		start_ep_timer(ep);
899		__state_set(&ep->com, CLOSING);
900		attrs.next_state = IWCH_QP_STATE_CLOSING;
901		iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
902			       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
903		peer_close_upcall(ep);
904		break;
905	case ABORTING:
906		disconnect = 0;
907		break;
908	case CLOSING:
909		__state_set(&ep->com, MORIBUND);
910		disconnect = 0;
911		break;
912	case MORIBUND:
913		stop_ep_timer(ep);
914		if (ep->com.cm_id && ep->com.qp) {
915			attrs.next_state = IWCH_QP_STATE_IDLE;
916			iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
917				       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
918		}
919		close_socket(&ep->com, 0);
920		close_complete_upcall(ep);
921		__state_set(&ep->com, DEAD);
922		release = 1;
923		disconnect = 0;
924		break;
925	case DEAD:
926		disconnect = 0;
927		break;
928	default:
929		PANIC_IF(1);
930	}
931	mtx_unlock(&ep->com.lock);
932	if (disconnect)
933		iwch_ep_disconnect(ep, 0, M_NOWAIT);
934	if (release)
935		put_ep(&ep->com);
936	return;
937}
938
939static void
940process_conn_error(struct iwch_ep *ep)
941{
942	struct iwch_qp_attributes attrs;
943	int ret;
944
945	mtx_lock(&ep->com.lock);
946	CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state);
947	switch (ep->com.state) {
948	case MPA_REQ_WAIT:
949		stop_ep_timer(ep);
950		break;
951	case MPA_REQ_SENT:
952		stop_ep_timer(ep);
953		connect_reply_upcall(ep, -ECONNRESET);
954		break;
955	case MPA_REP_SENT:
956		ep->com.rpl_err = ECONNRESET;
957		CTR1(KTR_IW_CXGB, "waking up ep %p", ep);
958		break;
959	case MPA_REQ_RCVD:
960
961		/*
962		 * We're gonna mark this puppy DEAD, but keep
963		 * the reference on it until the ULP accepts or
964		 * rejects the CR.
965		 */
966		break;
967	case MORIBUND:
968	case CLOSING:
969		stop_ep_timer(ep);
970		/*FALLTHROUGH*/
971	case FPDU_MODE:
972		if (ep->com.cm_id && ep->com.qp) {
973			attrs.next_state = IWCH_QP_STATE_ERROR;
974			ret = iwch_modify_qp(ep->com.qp->rhp,
975				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
976				     &attrs, 1);
977			if (ret)
978				log(LOG_ERR,
979				       "%s - qp <- error failed!\n",
980				       __FUNCTION__);
981		}
982		peer_abort_upcall(ep);
983		break;
984	case ABORTING:
985		break;
986	case DEAD:
987		mtx_unlock(&ep->com.lock);
988		CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__,
989			ep->com.so->so_error);
990		return;
991	default:
992		PANIC_IF(1);
993		break;
994	}
995
996	if (ep->com.state != ABORTING) {
997		close_socket(&ep->com, 0);
998		__state_set(&ep->com, DEAD);
999		put_ep(&ep->com);
1000	}
1001	mtx_unlock(&ep->com.lock);
1002	return;
1003}
1004
1005static void
1006process_close_complete(struct iwch_ep *ep)
1007{
1008	struct iwch_qp_attributes attrs;
1009	int release = 0;
1010
1011	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1012	PANIC_IF(!ep);
1013
1014	/* The cm_id may be null if we failed to connect */
1015	mtx_lock(&ep->com.lock);
1016	switch (ep->com.state) {
1017	case CLOSING:
1018		__state_set(&ep->com, MORIBUND);
1019		break;
1020	case MORIBUND:
1021		stop_ep_timer(ep);
1022		if ((ep->com.cm_id) && (ep->com.qp)) {
1023			attrs.next_state = IWCH_QP_STATE_IDLE;
1024			iwch_modify_qp(ep->com.qp->rhp,
1025					     ep->com.qp,
1026					     IWCH_QP_ATTR_NEXT_STATE,
1027					     &attrs, 1);
1028		}
1029		if (ep->parent_ep)
1030			close_socket(&ep->com, 1);
1031		else
1032			close_socket(&ep->com, 0);
1033		close_complete_upcall(ep);
1034		__state_set(&ep->com, DEAD);
1035		release = 1;
1036		break;
1037	case ABORTING:
1038		break;
1039	case DEAD:
1040	default:
1041		PANIC_IF(1);
1042		break;
1043	}
1044	mtx_unlock(&ep->com.lock);
1045	if (release)
1046		put_ep(&ep->com);
1047	return;
1048}
1049
1050/*
1051 * T3A does 3 things when a TERM is received:
1052 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1053 * 2) generate an async event on the QP with the TERMINATE opcode
1054 * 3) post a TERMINATE opcde cqe into the associated CQ.
1055 *
1056 * For (1), we save the message in the qp for later consumer consumption.
1057 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1058 * For (3), we toss the CQE in cxio_poll_cq().
1059 *
1060 * terminate() handles case (1)...
1061 */
1062static int
1063terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1064{
1065	struct adapter *sc = qs->adap;
1066	struct tom_data *td = sc->tom_softc;
1067	uint32_t hash = *((uint32_t *)r + 1);
1068	unsigned int tid = ntohl(hash) >> 8 & 0xfffff;
1069	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1070	struct socket *so = toep->tp_inp->inp_socket;
1071	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
1072
1073	if (state_read(&ep->com) != FPDU_MODE)
1074		goto done;
1075
1076	m_adj(m, sizeof(struct cpl_rdma_terminate));
1077
1078	CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes",
1079	    __func__, tid, ep, m->m_len);
1080
1081	m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
1082	ep->com.qp->attr.terminate_msg_len = m->m_len;
1083	ep->com.qp->attr.is_terminate_local = 0;
1084
1085done:
1086	m_freem(m);
1087	return (0);
1088}
1089
1090static int
1091ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1092{
1093	struct adapter *sc = qs->adap;
1094	struct tom_data *td = sc->tom_softc;
1095	struct cpl_rdma_ec_status *rep = mtod(m, void *);
1096	unsigned int tid = GET_TID(rep);
1097	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1098	struct socket *so = toep->tp_inp->inp_socket;
1099	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
1100
1101	if (rep->status) {
1102		struct iwch_qp_attributes attrs;
1103
1104		CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__);
1105		stop_ep_timer(ep);
1106		attrs.next_state = IWCH_QP_STATE_ERROR;
1107		iwch_modify_qp(ep->com.qp->rhp,
1108			     ep->com.qp,
1109			     IWCH_QP_ATTR_NEXT_STATE,
1110			     &attrs, 1);
1111		abort_connection(ep);
1112	}
1113
1114	m_freem(m);
1115	return (0);
1116}
1117
1118static void
1119ep_timeout(void *arg)
1120{
1121	struct iwch_ep *ep = (struct iwch_ep *)arg;
1122	struct iwch_qp_attributes attrs;
1123	int err = 0;
1124	int abort = 1;
1125
1126	mtx_lock(&ep->com.lock);
1127	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1128	switch (ep->com.state) {
1129	case MPA_REQ_SENT:
1130		__state_set(&ep->com, ABORTING);
1131		connect_reply_upcall(ep, -ETIMEDOUT);
1132		break;
1133	case MPA_REQ_WAIT:
1134		__state_set(&ep->com, ABORTING);
1135		break;
1136	case CLOSING:
1137	case MORIBUND:
1138		if (ep->com.cm_id && ep->com.qp)
1139			err = 1;
1140		__state_set(&ep->com, ABORTING);
1141		break;
1142	default:
1143		CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n",
1144			__func__, ep, ep->com.state);
1145		abort = 0;
1146	}
1147	mtx_unlock(&ep->com.lock);
1148	if (err){
1149		attrs.next_state = IWCH_QP_STATE_ERROR;
1150		iwch_modify_qp(ep->com.qp->rhp,
1151			     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1152			     &attrs, 1);
1153	}
1154	if (abort)
1155		abort_connection(ep);
1156	put_ep(&ep->com);
1157}
1158
1159int
1160iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1161{
1162	int err;
1163	struct iwch_ep *ep = to_ep(cm_id);
1164	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1165
1166	if (state_read(&ep->com) == DEAD) {
1167		put_ep(&ep->com);
1168		return (-ECONNRESET);
1169	}
1170	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1171	if (mpa_rev == 0) {
1172		abort_connection(ep);
1173	} else {
1174		err = send_mpa_reject(ep, pdata, pdata_len);
1175		err = soshutdown(ep->com.so, 3);
1176	}
1177	put_ep(&ep->com);
1178	return 0;
1179}
1180
1181int
1182iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1183{
1184	int err;
1185	struct iwch_qp_attributes attrs;
1186	enum iwch_qp_attr_mask mask;
1187	struct iwch_ep *ep = to_ep(cm_id);
1188	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1189	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1190
1191	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1192	if (state_read(&ep->com) == DEAD) {
1193		err = -ECONNRESET;
1194		goto err;
1195	}
1196
1197	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1198	PANIC_IF(!qp);
1199
1200	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1201	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1202		abort_connection(ep);
1203		err = -EINVAL;
1204		goto err;
1205	}
1206
1207	cm_id->add_ref(cm_id);
1208	ep->com.cm_id = cm_id;
1209	ep->com.qp = qp;
1210
1211	ep->com.rpl_err = 0;
1212	ep->com.rpl_done = 0;
1213	ep->ird = conn_param->ird;
1214	ep->ord = conn_param->ord;
1215	CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
1216
1217	/* bind QP to EP and move to RTS */
1218	attrs.mpa_attr = ep->mpa_attr;
1219	attrs.max_ird = ep->ird;
1220	attrs.max_ord = ep->ord;
1221	attrs.llp_stream_handle = ep;
1222	attrs.next_state = IWCH_QP_STATE_RTS;
1223
1224	/* bind QP and TID with INIT_WR */
1225	mask = IWCH_QP_ATTR_NEXT_STATE |
1226			     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1227			     IWCH_QP_ATTR_MPA_ATTR |
1228			     IWCH_QP_ATTR_MAX_IRD |
1229			     IWCH_QP_ATTR_MAX_ORD;
1230
1231	err = iwch_modify_qp(ep->com.qp->rhp,
1232			     ep->com.qp, mask, &attrs, 1);
1233
1234	if (err)
1235		goto err1;
1236
1237	err = send_mpa_reply(ep, conn_param->private_data,
1238 			     conn_param->private_data_len);
1239	if (err)
1240		goto err1;
1241	state_set(&ep->com, FPDU_MODE);
1242	established_upcall(ep);
1243	put_ep(&ep->com);
1244	return 0;
1245err1:
1246	ep->com.cm_id = NULL;
1247	ep->com.qp = NULL;
1248	cm_id->rem_ref(cm_id);
1249err:
1250	put_ep(&ep->com);
1251	return err;
1252}
1253
1254static int init_sock(struct iwch_ep_common *epc)
1255{
1256	int err;
1257	struct sockopt sopt;
1258	int on=1;
1259
1260	SOCK_LOCK(epc->so);
1261	soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc);
1262	epc->so->so_state |= SS_NBIO;
1263	SOCK_UNLOCK(epc->so);
1264	sopt.sopt_dir = SOPT_SET;
1265	sopt.sopt_level = IPPROTO_TCP;
1266	sopt.sopt_name = TCP_NODELAY;
1267	sopt.sopt_val = (caddr_t)&on;
1268	sopt.sopt_valsize = sizeof on;
1269	sopt.sopt_td = NULL;
1270	err = sosetopt(epc->so, &sopt);
1271	if (err)
1272		printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err);
1273
1274	return 0;
1275}
1276
1277static int
1278is_loopback_dst(struct iw_cm_id *cm_id)
1279{
1280	uint16_t port = cm_id->remote_addr.sin_port;
1281	int ifa_present;
1282
1283	cm_id->remote_addr.sin_port = 0;
1284	ifa_present = ifa_ifwithaddr_check(
1285	    (struct sockaddr *)&cm_id->remote_addr);
1286	cm_id->remote_addr.sin_port = port;
1287	return (ifa_present);
1288}
1289
1290int
1291iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1292{
1293	int err = 0;
1294	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1295	struct iwch_ep *ep;
1296	struct rtentry *rt;
1297	struct toedev *tdev;
1298
1299	if (is_loopback_dst(cm_id)) {
1300		err = -ENOSYS;
1301		goto out;
1302	}
1303
1304	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1305	if (!ep) {
1306		printf("%s - cannot alloc ep.\n", __FUNCTION__);
1307		err = (-ENOMEM);
1308		goto out;
1309	}
1310	callout_init(&ep->timer, 1);
1311	ep->plen = conn_param->private_data_len;
1312	if (ep->plen)
1313		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1314		       conn_param->private_data, ep->plen);
1315	ep->ird = conn_param->ird;
1316	ep->ord = conn_param->ord;
1317
1318	cm_id->add_ref(cm_id);
1319	ep->com.cm_id = cm_id;
1320	ep->com.qp = get_qhp(h, conn_param->qpn);
1321	ep->com.thread = curthread;
1322	PANIC_IF(!ep->com.qp);
1323	CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn,
1324	     ep->com.qp, cm_id);
1325
1326	ep->com.so = cm_id->so;
1327	err = init_sock(&ep->com);
1328	if (err)
1329		goto fail2;
1330
1331	/* find a route */
1332	rt = find_route(cm_id->local_addr.sin_addr.s_addr,
1333			cm_id->remote_addr.sin_addr.s_addr,
1334			cm_id->local_addr.sin_port,
1335			cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
1336	if (!rt) {
1337		printf("%s - cannot find route.\n", __FUNCTION__);
1338		err = EHOSTUNREACH;
1339		goto fail2;
1340	}
1341
1342	if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
1343		printf("%s - interface not TOE capable.\n", __FUNCTION__);
1344		RTFREE(rt);
1345		goto fail2;
1346	}
1347	tdev = TOEDEV(rt->rt_ifp);
1348	if (tdev == NULL) {
1349		printf("%s - No toedev for interface.\n", __FUNCTION__);
1350		RTFREE(rt);
1351		goto fail2;
1352	}
1353	RTFREE(rt);
1354
1355	state_set(&ep->com, CONNECTING);
1356	ep->com.local_addr = cm_id->local_addr;
1357	ep->com.remote_addr = cm_id->remote_addr;
1358	err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
1359		ep->com.thread);
1360	if (!err)
1361		goto out;
1362fail2:
1363	put_ep(&ep->com);
1364out:
1365	return err;
1366}
1367
1368int
1369iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1370{
1371	int err = 0;
1372	struct iwch_listen_ep *ep;
1373
1374	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1375	if (!ep) {
1376		printf("%s - cannot alloc ep.\n", __FUNCTION__);
1377		err = ENOMEM;
1378		goto out;
1379	}
1380	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1381	cm_id->add_ref(cm_id);
1382	ep->com.cm_id = cm_id;
1383	ep->backlog = backlog;
1384	ep->com.local_addr = cm_id->local_addr;
1385	ep->com.thread = curthread;
1386	state_set(&ep->com, LISTEN);
1387
1388	ep->com.so = cm_id->so;
1389	err = init_sock(&ep->com);
1390	if (err)
1391		goto fail;
1392
1393	err = solisten(ep->com.so, ep->backlog, ep->com.thread);
1394	if (!err) {
1395		cm_id->provider_data = ep;
1396		goto out;
1397	}
1398	close_socket(&ep->com, 0);
1399fail:
1400	cm_id->rem_ref(cm_id);
1401	put_ep(&ep->com);
1402out:
1403	return err;
1404}
1405
1406int
1407iwch_destroy_listen(struct iw_cm_id *cm_id)
1408{
1409	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
1410
1411	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1412
1413	state_set(&ep->com, DEAD);
1414	close_socket(&ep->com, 0);
1415	cm_id->rem_ref(cm_id);
1416	put_ep(&ep->com);
1417	return 0;
1418}
1419
1420int
1421iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags)
1422{
1423	int close = 0;
1424
1425	mtx_lock(&ep->com.lock);
1426
1427	PANIC_IF(!ep);
1428	PANIC_IF(!ep->com.so);
1429
1430	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
1431	     ep->com.so, states[ep->com.state], abrupt);
1432
1433	switch (ep->com.state) {
1434	case MPA_REQ_WAIT:
1435	case MPA_REQ_SENT:
1436	case MPA_REQ_RCVD:
1437	case MPA_REP_SENT:
1438	case FPDU_MODE:
1439		close = 1;
1440		if (abrupt)
1441			ep->com.state = ABORTING;
1442		else {
1443			ep->com.state = CLOSING;
1444			start_ep_timer(ep);
1445		}
1446		break;
1447	case CLOSING:
1448		close = 1;
1449		if (abrupt) {
1450			stop_ep_timer(ep);
1451			ep->com.state = ABORTING;
1452		} else
1453			ep->com.state = MORIBUND;
1454		break;
1455	case MORIBUND:
1456	case ABORTING:
1457	case DEAD:
1458		CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n",
1459			__func__, ep, ep->com.state);
1460		break;
1461	default:
1462		panic("unknown state: %d\n", ep->com.state);
1463		break;
1464	}
1465
1466	mtx_unlock(&ep->com.lock);
1467	if (close) {
1468		if (abrupt)
1469			abort_connection(ep);
1470		else {
1471			if (!ep->parent_ep)
1472				__state_set(&ep->com, MORIBUND);
1473			shutdown_socket(&ep->com);
1474		}
1475	}
1476	return 0;
1477}
1478
1479static void
1480process_data(struct iwch_ep *ep)
1481{
1482	struct sockaddr_in *local, *remote;
1483
1484	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1485
1486	switch (state_read(&ep->com)) {
1487	case MPA_REQ_SENT:
1488		process_mpa_reply(ep);
1489		break;
1490	case MPA_REQ_WAIT:
1491
1492		/*
1493		 * XXX
1494		 * Set local and remote addrs here because when we
1495		 * dequeue the newly accepted socket, they aren't set
1496		 * yet in the pcb!
1497		 */
1498		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
1499		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
1500		CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__,
1501			inet_ntoa(local->sin_addr),
1502			inet_ntoa(remote->sin_addr));
1503		ep->com.local_addr = *local;
1504		ep->com.remote_addr = *remote;
1505		free(local, M_SONAME);
1506		free(remote, M_SONAME);
1507		process_mpa_request(ep);
1508		break;
1509	default:
1510		if (sbavail(&ep->com.so->so_rcv))
1511			printf("%s Unexpected streaming data."
1512			       " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n",
1513			       __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state,
1514			       sbavail(&ep->com.so->so_rcv), ep->com.so->so_rcv.sb_mb);
1515		break;
1516	}
1517	return;
1518}
1519
1520static void
1521process_connected(struct iwch_ep *ep)
1522{
1523	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1524	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
1525		send_mpa_req(ep);
1526	} else {
1527		connect_reply_upcall(ep, -ep->com.so->so_error);
1528		close_socket(&ep->com, 0);
1529		state_set(&ep->com, DEAD);
1530		put_ep(&ep->com);
1531	}
1532}
1533
1534static struct socket *
1535dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep)
1536{
1537	struct socket *so;
1538
1539	ACCEPT_LOCK();
1540	so = TAILQ_FIRST(&head->so_comp);
1541	if (!so) {
1542		ACCEPT_UNLOCK();
1543		return NULL;
1544	}
1545	TAILQ_REMOVE(&head->so_comp, so, so_list);
1546	head->so_qlen--;
1547	SOCK_LOCK(so);
1548	so->so_qstate &= ~SQ_COMP;
1549	so->so_head = NULL;
1550	soref(so);
1551	soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep);
1552	so->so_state |= SS_NBIO;
1553	PANIC_IF(!(so->so_state & SS_ISCONNECTED));
1554	PANIC_IF(so->so_error);
1555	SOCK_UNLOCK(so);
1556	ACCEPT_UNLOCK();
1557	soaccept(so, (struct sockaddr **)remote);
1558	return so;
1559}
1560
1561static void
1562process_newconn(struct iwch_ep *parent_ep)
1563{
1564	struct socket *child_so;
1565	struct iwch_ep *child_ep;
1566	struct sockaddr_in *remote;
1567
1568	CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
1569	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
1570	if (!child_ep) {
1571		log(LOG_ERR, "%s - failed to allocate ep entry!\n",
1572		       __FUNCTION__);
1573		return;
1574	}
1575	child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
1576	if (!child_so) {
1577		log(LOG_ERR, "%s - failed to dequeue child socket!\n",
1578		       __FUNCTION__);
1579		__free_ep(&child_ep->com);
1580		return;
1581	}
1582	CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
1583		inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
1584	child_ep->com.tdev = parent_ep->com.tdev;
1585	child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family;
1586	child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port;
1587	child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr;
1588	child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len;
1589	child_ep->com.remote_addr.sin_family = remote->sin_family;
1590	child_ep->com.remote_addr.sin_port = remote->sin_port;
1591	child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr;
1592	child_ep->com.remote_addr.sin_len = remote->sin_len;
1593	child_ep->com.so = child_so;
1594	child_ep->com.cm_id = NULL;
1595	child_ep->com.thread = parent_ep->com.thread;
1596	child_ep->parent_ep = parent_ep;
1597
1598	free(remote, M_SONAME);
1599	get_ep(&parent_ep->com);
1600	child_ep->parent_ep = parent_ep;
1601	callout_init(&child_ep->timer, 1);
1602	state_set(&child_ep->com, MPA_REQ_WAIT);
1603	start_ep_timer(child_ep);
1604
1605	/* maybe the request has already been queued up on the socket... */
1606	process_mpa_request(child_ep);
1607}
1608
1609static int
1610iwch_so_upcall(struct socket *so, void *arg, int waitflag)
1611{
1612	struct iwch_ep *ep = arg;
1613
1614	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1615	mtx_lock(&req_lock);
1616	if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
1617		get_ep(&ep->com);
1618		TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
1619		taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task);
1620	}
1621	mtx_unlock(&req_lock);
1622	return (SU_OK);
1623}
1624
1625static void
1626process_socket_event(struct iwch_ep *ep)
1627{
1628	int state = state_read(&ep->com);
1629	struct socket *so = ep->com.so;
1630
1631	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1632	if (state == CONNECTING) {
1633		process_connected(ep);
1634		return;
1635	}
1636
1637	if (state == LISTEN) {
1638		process_newconn(ep);
1639		return;
1640	}
1641
1642	/* connection error */
1643	if (so->so_error) {
1644		process_conn_error(ep);
1645		return;
1646	}
1647
1648	/* peer close */
1649	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
1650		process_peer_close(ep);
1651		return;
1652	}
1653
1654	/* close complete */
1655	if (so->so_state & (SS_ISDISCONNECTED)) {
1656		process_close_complete(ep);
1657		return;
1658	}
1659
1660	/* rx data */
1661	process_data(ep);
1662	return;
1663}
1664
1665static void
1666process_req(void *ctx, int pending)
1667{
1668	struct iwch_ep_common *epc;
1669
1670	CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__);
1671	mtx_lock(&req_lock);
1672	while (!TAILQ_EMPTY(&req_list)) {
1673		epc = TAILQ_FIRST(&req_list);
1674		TAILQ_REMOVE(&req_list, epc, entry);
1675		epc->entry.tqe_prev = NULL;
1676		mtx_unlock(&req_lock);
1677		if (epc->so)
1678			process_socket_event((struct iwch_ep *)epc);
1679		put_ep(epc);
1680		mtx_lock(&req_lock);
1681	}
1682	mtx_unlock(&req_lock);
1683}
1684
1685int
1686iwch_cm_init(void)
1687{
1688	TAILQ_INIT(&req_list);
1689	mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF);
1690	iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT,
1691		taskqueue_thread_enqueue, &iw_cxgb_taskq);
1692        if (iw_cxgb_taskq == NULL) {
1693                printf("failed to allocate iw_cxgb taskqueue\n");
1694                return (ENOMEM);
1695        }
1696        taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
1697        TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
1698	return (0);
1699}
1700
1701void
1702iwch_cm_term(void)
1703{
1704
1705	taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
1706	taskqueue_free(iw_cxgb_taskq);
1707}
1708
1709void
1710iwch_cm_init_cpl(struct adapter *sc)
1711{
1712
1713	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
1714	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status);
1715}
1716
1717void
1718iwch_cm_term_cpl(struct adapter *sc)
1719{
1720
1721	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
1722	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL);
1723}
1724#endif
1725