iw_cxgb_cm.c revision 294610
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c 294610 2016-01-22 23:33:34Z np $");
31
32#include "opt_inet.h"
33
34#ifdef TCP_OFFLOAD
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/pciio.h>
40#include <sys/conf.h>
41#include <machine/bus.h>
42#include <machine/resource.h>
43#include <sys/bus_dma.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/rwlock.h>
48#include <sys/linker.h>
49#include <sys/firmware.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59#include <sys/uio.h>
60
61#include <net/route.h>
62#include <netinet/in_systm.h>
63#include <netinet/in.h>
64#include <netinet/in_fib.h>
65#include <netinet/in_pcb.h>
66#include <netinet/ip.h>
67#include <netinet/ip_var.h>
68#include <netinet/tcp_var.h>
69#include <netinet/tcp.h>
70#include <netinet/tcpip.h>
71
72#include <rdma/ib_verbs.h>
73#include <linux/idr.h>
74#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
75
76#include <cxgb_include.h>
77#include <ulp/tom/cxgb_tom.h>
78#include <ulp/tom/cxgb_toepcb.h>
79#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
80#include <rdma/ib_verbs.h>
81#include <linux/idr.h>
82
83#include <ulp/iw_cxgb/iw_cxgb_wr.h>
84#include <ulp/iw_cxgb/iw_cxgb_hal.h>
85#include <ulp/iw_cxgb/iw_cxgb_provider.h>
86#include <ulp/iw_cxgb/iw_cxgb_cm.h>
87#include <ulp/iw_cxgb/iw_cxgb.h>
88
89#ifdef KTR
90static char *states[] = {
91	"idle",
92	"listen",
93	"connecting",
94	"mpa_wait_req",
95	"mpa_req_sent",
96	"mpa_req_rcvd",
97	"mpa_rep_sent",
98	"fpdu_mode",
99	"aborting",
100	"closing",
101	"moribund",
102	"dead",
103	NULL,
104};
105#endif
106
107SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
108
109static int ep_timeout_secs = 60;
110SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0,
111    "CM Endpoint operation timeout in seconds (default=60)");
112
113static int mpa_rev = 1;
114SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0,
115    "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
116
117static int markers_enabled = 0;
118SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0,
119    "Enable MPA MARKERS (default(0)=disabled)");
120
121static int crc_enabled = 1;
122SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0,
123    "Enable MPA CRC (default(1)=enabled)");
124
125static int rcv_win = 256 * 1024;
126SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0,
127    "TCP receive window in bytes (default=256KB)");
128
129static int snd_win = 32 * 1024;
130SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0,
131    "TCP send window in bytes (default=32KB)");
132
133static unsigned int nocong = 0;
134SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RWTUN, &nocong, 0,
135    "Turn off congestion control (default=0)");
136
137static unsigned int cong_flavor = 1;
138SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RWTUN, &cong_flavor, 0,
139    "TCP Congestion control flavor (default=1)");
140
141static void ep_timeout(void *arg);
142static void connect_reply_upcall(struct iwch_ep *ep, int status);
143static int iwch_so_upcall(struct socket *so, void *arg, int waitflag);
144
145/*
146 * Cruft to offload socket upcalls onto thread.
147 */
148static struct mtx req_lock;
149static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list;
150static struct task iw_cxgb_task;
151static struct taskqueue *iw_cxgb_taskq;
152static void process_req(void *ctx, int pending);
153
154static void
155start_ep_timer(struct iwch_ep *ep)
156{
157	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
158	if (callout_pending(&ep->timer)) {
159		CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep);
160		callout_deactivate(&ep->timer);
161		callout_drain(&ep->timer);
162	} else {
163		/*
164		 * XXX this looks racy
165		 */
166		get_ep(&ep->com);
167		callout_init(&ep->timer, 1);
168	}
169	callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep);
170}
171
172static void
173stop_ep_timer(struct iwch_ep *ep)
174{
175	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
176	if (!callout_pending(&ep->timer)) {
177		CTR3(KTR_IW_CXGB, "%s timer stopped when its not running!  ep %p state %u\n",
178                       __func__, ep, ep->com.state);
179		return;
180	}
181	callout_drain(&ep->timer);
182	put_ep(&ep->com);
183}
184
185static int
186set_tcpinfo(struct iwch_ep *ep)
187{
188	struct socket *so = ep->com.so;
189	struct inpcb *inp = sotoinpcb(so);
190	struct tcpcb *tp;
191	struct toepcb *toep;
192	int rc = 0;
193
194	INP_WLOCK(inp);
195	tp = intotcpcb(inp);
196
197	if ((tp->t_flags & TF_TOE) == 0) {
198		rc = EINVAL;
199		printf("%s: connection NOT OFFLOADED!\n", __func__);
200		goto done;
201	}
202	toep = tp->t_toe;
203
204	ep->hwtid = toep->tp_tid;
205	ep->snd_seq = tp->snd_nxt;
206	ep->rcv_seq = tp->rcv_nxt;
207	ep->emss = tp->t_maxseg;
208	if (ep->emss < 128)
209		ep->emss = 128;
210done:
211	INP_WUNLOCK(inp);
212	return (rc);
213
214}
215
216static enum iwch_ep_state
217state_read(struct iwch_ep_common *epc)
218{
219	enum iwch_ep_state state;
220
221	mtx_lock(&epc->lock);
222	state = epc->state;
223	mtx_unlock(&epc->lock);
224	return state;
225}
226
227static void
228__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
229{
230	epc->state = new;
231}
232
233static void
234state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
235{
236
237	mtx_lock(&epc->lock);
238	CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]);
239	__state_set(epc, new);
240	mtx_unlock(&epc->lock);
241	return;
242}
243
244static void *
245alloc_ep(int size, int flags)
246{
247	struct iwch_ep_common *epc;
248
249	epc = malloc(size, M_DEVBUF, flags);
250	if (epc) {
251		memset(epc, 0, size);
252		refcount_init(&epc->refcount, 1);
253		mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK);
254		cv_init(&epc->waitq, "iwch_epc cv");
255	}
256	CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc);
257	return epc;
258}
259
260void __free_ep(struct iwch_ep_common *epc)
261{
262	CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
263	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
264	free(epc, M_DEVBUF);
265}
266
267static int
268find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
269    __be16 peer_port, u8 tos, struct nhop4_extended *pnh4)
270{
271	struct in_addr addr;
272
273	addr.s_addr = peer_ip;
274	return (fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4));
275}
276
277static void
278close_socket(struct iwch_ep_common *epc, int close)
279{
280	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
281	SOCK_LOCK(epc->so);
282	soupcall_clear(epc->so, SO_RCV);
283	SOCK_UNLOCK(epc->so);
284	if (close)
285		soclose(epc->so);
286	else
287		soshutdown(epc->so, SHUT_WR|SHUT_RD);
288	epc->so = NULL;
289}
290
291static void
292shutdown_socket(struct iwch_ep_common *epc)
293{
294	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
295	soshutdown(epc->so, SHUT_WR);
296}
297
298static void
299abort_socket(struct iwch_ep *ep)
300{
301	struct sockopt sopt;
302	int err;
303	struct linger l;
304
305	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
306	l.l_onoff = 1;
307	l.l_linger = 0;
308
309	/* linger_time of 0 forces RST to be sent */
310	sopt.sopt_dir = SOPT_SET;
311	sopt.sopt_level = SOL_SOCKET;
312	sopt.sopt_name = SO_LINGER;
313	sopt.sopt_val = (caddr_t)&l;
314	sopt.sopt_valsize = sizeof l;
315	sopt.sopt_td = NULL;
316	err = sosetopt(ep->com.so, &sopt);
317	if (err)
318		printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err);
319}
320
321static void
322send_mpa_req(struct iwch_ep *ep)
323{
324	int mpalen;
325	struct mpa_message *mpa;
326	struct mbuf *m;
327	int err;
328
329	CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen);
330
331	mpalen = sizeof(*mpa) + ep->plen;
332	m = m_gethdr(mpalen, M_NOWAIT);
333	if (m == NULL) {
334		connect_reply_upcall(ep, -ENOMEM);
335		return;
336	}
337	mpa = mtod(m, struct mpa_message *);
338	m->m_len = mpalen;
339	m->m_pkthdr.len = mpalen;
340	memset(mpa, 0, sizeof(*mpa));
341	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
342	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
343		     (markers_enabled ? MPA_MARKERS : 0);
344	mpa->private_data_size = htons(ep->plen);
345	mpa->revision = mpa_rev;
346	if (ep->plen)
347		memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
348
349	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
350	if (err) {
351		m_freem(m);
352		connect_reply_upcall(ep, -ENOMEM);
353		return;
354	}
355
356	start_ep_timer(ep);
357	state_set(&ep->com, MPA_REQ_SENT);
358	return;
359}
360
361static int
362send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
363{
364	int mpalen;
365	struct mpa_message *mpa;
366	struct mbuf *m;
367	int err;
368
369	CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen);
370
371	mpalen = sizeof(*mpa) + plen;
372
373	m = m_gethdr(mpalen, M_NOWAIT);
374	if (m == NULL) {
375		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
376		return (-ENOMEM);
377	}
378	mpa = mtod(m, struct mpa_message *);
379	m->m_len = mpalen;
380	m->m_pkthdr.len = mpalen;
381	memset(mpa, 0, sizeof(*mpa));
382	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
383	mpa->flags = MPA_REJECT;
384	mpa->revision = mpa_rev;
385	mpa->private_data_size = htons(plen);
386	if (plen)
387		memcpy(mpa->private_data, pdata, plen);
388	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
389	PANIC_IF(err);
390	return 0;
391}
392
393static int
394send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
395{
396	int mpalen;
397	struct mpa_message *mpa;
398	struct mbuf *m;
399
400	CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen);
401
402	mpalen = sizeof(*mpa) + plen;
403
404	m = m_gethdr(mpalen, M_NOWAIT);
405	if (m == NULL) {
406		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
407		return (-ENOMEM);
408	}
409	mpa = mtod(m, struct mpa_message *);
410	m->m_len = mpalen;
411	m->m_pkthdr.len = mpalen;
412	memset(mpa, 0, sizeof(*mpa));
413	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
414	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
415		     (markers_enabled ? MPA_MARKERS : 0);
416	mpa->revision = mpa_rev;
417	mpa->private_data_size = htons(plen);
418	if (plen)
419		memcpy(mpa->private_data, pdata, plen);
420
421	state_set(&ep->com, MPA_REP_SENT);
422	return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
423		ep->com.thread);
424}
425
426static void
427close_complete_upcall(struct iwch_ep *ep)
428{
429	struct iw_cm_event event;
430
431	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
432	memset(&event, 0, sizeof(event));
433	event.event = IW_CM_EVENT_CLOSE;
434	if (ep->com.cm_id) {
435		CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d",
436		     ep, ep->com.cm_id, ep->hwtid);
437		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
438		ep->com.cm_id->rem_ref(ep->com.cm_id);
439		ep->com.cm_id = NULL;
440		ep->com.qp = NULL;
441	}
442}
443
444static void
445abort_connection(struct iwch_ep *ep)
446{
447	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
448	state_set(&ep->com, ABORTING);
449	abort_socket(ep);
450	close_socket(&ep->com, 0);
451	close_complete_upcall(ep);
452	state_set(&ep->com, DEAD);
453	put_ep(&ep->com);
454}
455
456static void
457peer_close_upcall(struct iwch_ep *ep)
458{
459	struct iw_cm_event event;
460
461	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
462	memset(&event, 0, sizeof(event));
463	event.event = IW_CM_EVENT_DISCONNECT;
464	if (ep->com.cm_id) {
465		CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d",
466		     ep, ep->com.cm_id, ep->hwtid);
467		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
468	}
469}
470
471static void
472peer_abort_upcall(struct iwch_ep *ep)
473{
474	struct iw_cm_event event;
475
476	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
477	memset(&event, 0, sizeof(event));
478	event.event = IW_CM_EVENT_CLOSE;
479	event.status = ECONNRESET;
480	if (ep->com.cm_id) {
481		CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep,
482		     ep->com.cm_id, ep->hwtid);
483		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
484		ep->com.cm_id->rem_ref(ep->com.cm_id);
485		ep->com.cm_id = NULL;
486		ep->com.qp = NULL;
487	}
488}
489
490static void
491connect_reply_upcall(struct iwch_ep *ep, int status)
492{
493	struct iw_cm_event event;
494
495	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status);
496	memset(&event, 0, sizeof(event));
497	event.event = IW_CM_EVENT_CONNECT_REPLY;
498	event.status = status;
499	event.local_addr = ep->com.local_addr;
500	event.remote_addr = ep->com.remote_addr;
501
502	if ((status == 0) || (status == ECONNREFUSED)) {
503		event.private_data_len = ep->plen;
504		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
505	}
506	if (ep->com.cm_id) {
507		CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep,
508		     ep->hwtid, status);
509		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
510	}
511	if (status < 0) {
512		ep->com.cm_id->rem_ref(ep->com.cm_id);
513		ep->com.cm_id = NULL;
514		ep->com.qp = NULL;
515	}
516}
517
518static void
519connect_request_upcall(struct iwch_ep *ep)
520{
521	struct iw_cm_event event;
522
523	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
524	memset(&event, 0, sizeof(event));
525	event.event = IW_CM_EVENT_CONNECT_REQUEST;
526	event.local_addr = ep->com.local_addr;
527	event.remote_addr = ep->com.remote_addr;
528	event.private_data_len = ep->plen;
529	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
530	event.provider_data = ep;
531	event.so = ep->com.so;
532	if (state_read(&ep->parent_ep->com) != DEAD) {
533		get_ep(&ep->com);
534		ep->parent_ep->com.cm_id->event_handler(
535						ep->parent_ep->com.cm_id,
536						&event);
537	}
538	put_ep(&ep->parent_ep->com);
539}
540
541static void
542established_upcall(struct iwch_ep *ep)
543{
544	struct iw_cm_event event;
545
546	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
547	memset(&event, 0, sizeof(event));
548	event.event = IW_CM_EVENT_ESTABLISHED;
549	if (ep->com.cm_id) {
550		CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid);
551		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
552	}
553}
554
555static void
556process_mpa_reply(struct iwch_ep *ep)
557{
558	struct mpa_message *mpa;
559	u16 plen;
560	struct iwch_qp_attributes attrs;
561	enum iwch_qp_attr_mask mask;
562	int err;
563	struct mbuf *top, *m;
564	int flags = MSG_DONTWAIT;
565	struct uio uio;
566	int len;
567
568	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
569
570	/*
571	 * Stop mpa timer.  If it expired, then the state has
572	 * changed and we bail since ep_timeout already aborted
573	 * the connection.
574	 */
575	stop_ep_timer(ep);
576	if (state_read(&ep->com) != MPA_REQ_SENT)
577		return;
578
579	uio.uio_resid = len = 1000000;
580	uio.uio_td = ep->com.thread;
581	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
582	if (err) {
583		if (err == EWOULDBLOCK) {
584			start_ep_timer(ep);
585			return;
586		}
587		err = -err;
588		goto err;
589	}
590
591	if (ep->com.so->so_rcv.sb_mb) {
592		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
593			__FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
594	}
595
596	m = top;
597	do {
598		/*
599		 * If we get more than the supported amount of private data
600		 * then we must fail this connection.
601		 */
602		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
603			err = (-EINVAL);
604			goto err;
605		}
606
607		/*
608		 * copy the new data into our accumulation buffer.
609		 */
610		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
611		ep->mpa_pkt_len += m->m_len;
612		if (!m->m_next)
613			m = m->m_nextpkt;
614		else
615			m = m->m_next;
616	} while (m);
617
618	m_freem(top);
619
620	/*
621	 * if we don't even have the mpa message, then bail.
622	 */
623	if (ep->mpa_pkt_len < sizeof(*mpa))
624		return;
625	mpa = (struct mpa_message *)ep->mpa_pkt;
626
627	/* Validate MPA header. */
628	if (mpa->revision != mpa_rev) {
629		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
630		err = EPROTO;
631		goto err;
632	}
633	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
634		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
635		err = EPROTO;
636		goto err;
637	}
638
639	plen = ntohs(mpa->private_data_size);
640
641	/*
642	 * Fail if there's too much private data.
643	 */
644	if (plen > MPA_MAX_PRIVATE_DATA) {
645		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
646		err = EPROTO;
647		goto err;
648	}
649
650	/*
651	 * If plen does not account for pkt size
652	 */
653	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
654		CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len);
655		err = EPROTO;
656		goto err;
657	}
658
659	ep->plen = (u8) plen;
660
661	/*
662	 * If we don't have all the pdata yet, then bail.
663	 * We'll continue process when more data arrives.
664	 */
665	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
666		return;
667
668	if (mpa->flags & MPA_REJECT) {
669		err = ECONNREFUSED;
670		goto err;
671	}
672
673	/*
674	 * If we get here we have accumulated the entire mpa
675	 * start reply message including private data. And
676	 * the MPA header is valid.
677	 */
678	CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
679	state_set(&ep->com, FPDU_MODE);
680	ep->mpa_attr.initiator = 1;
681	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
682	ep->mpa_attr.recv_marker_enabled = markers_enabled;
683	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
684	ep->mpa_attr.version = mpa_rev;
685	if (set_tcpinfo(ep)) {
686		printf("%s set_tcpinfo error\n", __FUNCTION__);
687		goto err;
688	}
689	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
690	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
691	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
692	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
693
694	attrs.mpa_attr = ep->mpa_attr;
695	attrs.max_ird = ep->ird;
696	attrs.max_ord = ep->ord;
697	attrs.llp_stream_handle = ep;
698	attrs.next_state = IWCH_QP_STATE_RTS;
699
700	mask = IWCH_QP_ATTR_NEXT_STATE |
701	    IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
702	    IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
703
704	/* bind QP and TID with INIT_WR */
705	err = iwch_modify_qp(ep->com.qp->rhp,
706			     ep->com.qp, mask, &attrs, 1);
707	if (!err)
708		goto out;
709err:
710	abort_connection(ep);
711out:
712	connect_reply_upcall(ep, err);
713	return;
714}
715
716static void
717process_mpa_request(struct iwch_ep *ep)
718{
719	struct mpa_message *mpa;
720	u16 plen;
721	int flags = MSG_DONTWAIT;
722	struct mbuf *top, *m;
723	int err;
724	struct uio uio;
725	int len;
726
727	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
728
729	/*
730	 * Stop mpa timer.  If it expired, then the state has
731	 * changed and we bail since ep_timeout already aborted
732	 * the connection.
733	 */
734	stop_ep_timer(ep);
735	if (state_read(&ep->com) != MPA_REQ_WAIT)
736		return;
737
738	uio.uio_resid = len = 1000000;
739	uio.uio_td = ep->com.thread;
740	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
741	if (err) {
742		if (err == EWOULDBLOCK) {
743			start_ep_timer(ep);
744			return;
745		}
746		err = -err;
747		goto err;
748	}
749
750	m = top;
751	do {
752
753		/*
754		 * If we get more than the supported amount of private data
755		 * then we must fail this connection.
756		 */
757		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
758			CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__,
759				ep->mpa_pkt_len + m->m_len);
760			goto err;
761		}
762
763
764		/*
765		 * Copy the new data into our accumulation buffer.
766		 */
767		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
768		ep->mpa_pkt_len += m->m_len;
769
770		if (!m->m_next)
771			m = m->m_nextpkt;
772		else
773			m = m->m_next;
774	} while (m);
775
776	m_freem(top);
777
778	/*
779	 * If we don't even have the mpa message, then bail.
780	 * We'll continue process when more data arrives.
781	 */
782	if (ep->mpa_pkt_len < sizeof(*mpa)) {
783		start_ep_timer(ep);
784		CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__,
785			ep->mpa_pkt_len);
786		return;
787	}
788	mpa = (struct mpa_message *) ep->mpa_pkt;
789
790	/*
791	 * Validate MPA Header.
792	 */
793	if (mpa->revision != mpa_rev) {
794		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
795		goto err;
796	}
797
798	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
799		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
800		goto err;
801	}
802
803	plen = ntohs(mpa->private_data_size);
804
805	/*
806	 * Fail if there's too much private data.
807	 */
808	if (plen > MPA_MAX_PRIVATE_DATA) {
809		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
810		goto err;
811	}
812
813	/*
814	 * If plen does not account for pkt size
815	 */
816	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
817		CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__,
818			ep->mpa_pkt_len);
819		goto err;
820	}
821	ep->plen = (u8) plen;
822
823	/*
824	 * If we don't have all the pdata yet, then bail.
825	 */
826	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
827		start_ep_timer(ep);
828		CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__,
829			ep->mpa_pkt_len);
830		return;
831	}
832
833	/*
834	 * If we get here we have accumulated the entire mpa
835	 * start reply message including private data.
836	 */
837	ep->mpa_attr.initiator = 0;
838	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
839	ep->mpa_attr.recv_marker_enabled = markers_enabled;
840	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
841	ep->mpa_attr.version = mpa_rev;
842	if (set_tcpinfo(ep)) {
843		printf("%s set_tcpinfo error\n", __FUNCTION__);
844		goto err;
845	}
846	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
847	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
848	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
849	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
850
851	state_set(&ep->com, MPA_REQ_RCVD);
852
853	/* drive upcall */
854	connect_request_upcall(ep);
855	return;
856err:
857	abort_connection(ep);
858	return;
859}
860
861static void
862process_peer_close(struct iwch_ep *ep)
863{
864	struct iwch_qp_attributes attrs;
865	int disconnect = 1;
866	int release = 0;
867
868	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
869
870	mtx_lock(&ep->com.lock);
871	switch (ep->com.state) {
872	case MPA_REQ_WAIT:
873		__state_set(&ep->com, CLOSING);
874		break;
875	case MPA_REQ_SENT:
876		__state_set(&ep->com, CLOSING);
877		connect_reply_upcall(ep, -ECONNRESET);
878		break;
879	case MPA_REQ_RCVD:
880
881		/*
882		 * We're gonna mark this puppy DEAD, but keep
883		 * the reference on it until the ULP accepts or
884		 * rejects the CR.
885		 */
886		__state_set(&ep->com, CLOSING);
887		break;
888	case MPA_REP_SENT:
889		__state_set(&ep->com, CLOSING);
890		break;
891	case FPDU_MODE:
892		start_ep_timer(ep);
893		__state_set(&ep->com, CLOSING);
894		attrs.next_state = IWCH_QP_STATE_CLOSING;
895		iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
896			       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
897		peer_close_upcall(ep);
898		break;
899	case ABORTING:
900		disconnect = 0;
901		break;
902	case CLOSING:
903		__state_set(&ep->com, MORIBUND);
904		disconnect = 0;
905		break;
906	case MORIBUND:
907		stop_ep_timer(ep);
908		if (ep->com.cm_id && ep->com.qp) {
909			attrs.next_state = IWCH_QP_STATE_IDLE;
910			iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
911				       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
912		}
913		close_socket(&ep->com, 0);
914		close_complete_upcall(ep);
915		__state_set(&ep->com, DEAD);
916		release = 1;
917		disconnect = 0;
918		break;
919	case DEAD:
920		disconnect = 0;
921		break;
922	default:
923		PANIC_IF(1);
924	}
925	mtx_unlock(&ep->com.lock);
926	if (disconnect)
927		iwch_ep_disconnect(ep, 0, M_NOWAIT);
928	if (release)
929		put_ep(&ep->com);
930	return;
931}
932
933static void
934process_conn_error(struct iwch_ep *ep)
935{
936	struct iwch_qp_attributes attrs;
937	int ret;
938
939	mtx_lock(&ep->com.lock);
940	CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state);
941	switch (ep->com.state) {
942	case MPA_REQ_WAIT:
943		stop_ep_timer(ep);
944		break;
945	case MPA_REQ_SENT:
946		stop_ep_timer(ep);
947		connect_reply_upcall(ep, -ECONNRESET);
948		break;
949	case MPA_REP_SENT:
950		ep->com.rpl_err = ECONNRESET;
951		CTR1(KTR_IW_CXGB, "waking up ep %p", ep);
952		break;
953	case MPA_REQ_RCVD:
954
955		/*
956		 * We're gonna mark this puppy DEAD, but keep
957		 * the reference on it until the ULP accepts or
958		 * rejects the CR.
959		 */
960		break;
961	case MORIBUND:
962	case CLOSING:
963		stop_ep_timer(ep);
964		/*FALLTHROUGH*/
965	case FPDU_MODE:
966		if (ep->com.cm_id && ep->com.qp) {
967			attrs.next_state = IWCH_QP_STATE_ERROR;
968			ret = iwch_modify_qp(ep->com.qp->rhp,
969				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
970				     &attrs, 1);
971			if (ret)
972				log(LOG_ERR,
973				       "%s - qp <- error failed!\n",
974				       __FUNCTION__);
975		}
976		peer_abort_upcall(ep);
977		break;
978	case ABORTING:
979		break;
980	case DEAD:
981		mtx_unlock(&ep->com.lock);
982		CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__,
983			ep->com.so->so_error);
984		return;
985	default:
986		PANIC_IF(1);
987		break;
988	}
989
990	if (ep->com.state != ABORTING) {
991		close_socket(&ep->com, 0);
992		__state_set(&ep->com, DEAD);
993		put_ep(&ep->com);
994	}
995	mtx_unlock(&ep->com.lock);
996	return;
997}
998
999static void
1000process_close_complete(struct iwch_ep *ep)
1001{
1002	struct iwch_qp_attributes attrs;
1003	int release = 0;
1004
1005	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1006	PANIC_IF(!ep);
1007
1008	/* The cm_id may be null if we failed to connect */
1009	mtx_lock(&ep->com.lock);
1010	switch (ep->com.state) {
1011	case CLOSING:
1012		__state_set(&ep->com, MORIBUND);
1013		break;
1014	case MORIBUND:
1015		stop_ep_timer(ep);
1016		if ((ep->com.cm_id) && (ep->com.qp)) {
1017			attrs.next_state = IWCH_QP_STATE_IDLE;
1018			iwch_modify_qp(ep->com.qp->rhp,
1019					     ep->com.qp,
1020					     IWCH_QP_ATTR_NEXT_STATE,
1021					     &attrs, 1);
1022		}
1023		if (ep->parent_ep)
1024			close_socket(&ep->com, 1);
1025		else
1026			close_socket(&ep->com, 0);
1027		close_complete_upcall(ep);
1028		__state_set(&ep->com, DEAD);
1029		release = 1;
1030		break;
1031	case ABORTING:
1032		break;
1033	case DEAD:
1034	default:
1035		PANIC_IF(1);
1036		break;
1037	}
1038	mtx_unlock(&ep->com.lock);
1039	if (release)
1040		put_ep(&ep->com);
1041	return;
1042}
1043
1044/*
1045 * T3A does 3 things when a TERM is received:
1046 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1047 * 2) generate an async event on the QP with the TERMINATE opcode
1048 * 3) post a TERMINATE opcde cqe into the associated CQ.
1049 *
1050 * For (1), we save the message in the qp for later consumer consumption.
1051 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1052 * For (3), we toss the CQE in cxio_poll_cq().
1053 *
1054 * terminate() handles case (1)...
1055 */
1056static int
1057terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1058{
1059	struct adapter *sc = qs->adap;
1060	struct tom_data *td = sc->tom_softc;
1061	uint32_t hash = *((uint32_t *)r + 1);
1062	unsigned int tid = ntohl(hash) >> 8 & 0xfffff;
1063	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1064	struct socket *so = toep->tp_inp->inp_socket;
1065	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
1066
1067	if (state_read(&ep->com) != FPDU_MODE)
1068		goto done;
1069
1070	m_adj(m, sizeof(struct cpl_rdma_terminate));
1071
1072	CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes",
1073	    __func__, tid, ep, m->m_len);
1074
1075	m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
1076	ep->com.qp->attr.terminate_msg_len = m->m_len;
1077	ep->com.qp->attr.is_terminate_local = 0;
1078
1079done:
1080	m_freem(m);
1081	return (0);
1082}
1083
1084static int
1085ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1086{
1087	struct adapter *sc = qs->adap;
1088	struct tom_data *td = sc->tom_softc;
1089	struct cpl_rdma_ec_status *rep = mtod(m, void *);
1090	unsigned int tid = GET_TID(rep);
1091	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1092	struct socket *so = toep->tp_inp->inp_socket;
1093	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
1094
1095	if (rep->status) {
1096		struct iwch_qp_attributes attrs;
1097
1098		CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__);
1099		stop_ep_timer(ep);
1100		attrs.next_state = IWCH_QP_STATE_ERROR;
1101		iwch_modify_qp(ep->com.qp->rhp,
1102			     ep->com.qp,
1103			     IWCH_QP_ATTR_NEXT_STATE,
1104			     &attrs, 1);
1105		abort_connection(ep);
1106	}
1107
1108	m_freem(m);
1109	return (0);
1110}
1111
1112static void
1113ep_timeout(void *arg)
1114{
1115	struct iwch_ep *ep = (struct iwch_ep *)arg;
1116	struct iwch_qp_attributes attrs;
1117	int err = 0;
1118	int abort = 1;
1119
1120	mtx_lock(&ep->com.lock);
1121	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1122	switch (ep->com.state) {
1123	case MPA_REQ_SENT:
1124		__state_set(&ep->com, ABORTING);
1125		connect_reply_upcall(ep, -ETIMEDOUT);
1126		break;
1127	case MPA_REQ_WAIT:
1128		__state_set(&ep->com, ABORTING);
1129		break;
1130	case CLOSING:
1131	case MORIBUND:
1132		if (ep->com.cm_id && ep->com.qp)
1133			err = 1;
1134		__state_set(&ep->com, ABORTING);
1135		break;
1136	default:
1137		CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n",
1138			__func__, ep, ep->com.state);
1139		abort = 0;
1140	}
1141	mtx_unlock(&ep->com.lock);
1142	if (err){
1143		attrs.next_state = IWCH_QP_STATE_ERROR;
1144		iwch_modify_qp(ep->com.qp->rhp,
1145			     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1146			     &attrs, 1);
1147	}
1148	if (abort)
1149		abort_connection(ep);
1150	put_ep(&ep->com);
1151}
1152
1153int
1154iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1155{
1156	int err;
1157	struct iwch_ep *ep = to_ep(cm_id);
1158	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1159
1160	if (state_read(&ep->com) == DEAD) {
1161		put_ep(&ep->com);
1162		return (-ECONNRESET);
1163	}
1164	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1165	if (mpa_rev == 0) {
1166		abort_connection(ep);
1167	} else {
1168		err = send_mpa_reject(ep, pdata, pdata_len);
1169		err = soshutdown(ep->com.so, 3);
1170	}
1171	put_ep(&ep->com);
1172	return 0;
1173}
1174
1175int
1176iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1177{
1178	int err;
1179	struct iwch_qp_attributes attrs;
1180	enum iwch_qp_attr_mask mask;
1181	struct iwch_ep *ep = to_ep(cm_id);
1182	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1183	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1184
1185	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1186	if (state_read(&ep->com) == DEAD) {
1187		err = -ECONNRESET;
1188		goto err;
1189	}
1190
1191	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1192	PANIC_IF(!qp);
1193
1194	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1195	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1196		abort_connection(ep);
1197		err = -EINVAL;
1198		goto err;
1199	}
1200
1201	cm_id->add_ref(cm_id);
1202	ep->com.cm_id = cm_id;
1203	ep->com.qp = qp;
1204
1205	ep->com.rpl_err = 0;
1206	ep->com.rpl_done = 0;
1207	ep->ird = conn_param->ird;
1208	ep->ord = conn_param->ord;
1209	CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
1210
1211	/* bind QP to EP and move to RTS */
1212	attrs.mpa_attr = ep->mpa_attr;
1213	attrs.max_ird = ep->ird;
1214	attrs.max_ord = ep->ord;
1215	attrs.llp_stream_handle = ep;
1216	attrs.next_state = IWCH_QP_STATE_RTS;
1217
1218	/* bind QP and TID with INIT_WR */
1219	mask = IWCH_QP_ATTR_NEXT_STATE |
1220			     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1221			     IWCH_QP_ATTR_MPA_ATTR |
1222			     IWCH_QP_ATTR_MAX_IRD |
1223			     IWCH_QP_ATTR_MAX_ORD;
1224
1225	err = iwch_modify_qp(ep->com.qp->rhp,
1226			     ep->com.qp, mask, &attrs, 1);
1227
1228	if (err)
1229		goto err1;
1230
1231	err = send_mpa_reply(ep, conn_param->private_data,
1232 			     conn_param->private_data_len);
1233	if (err)
1234		goto err1;
1235	state_set(&ep->com, FPDU_MODE);
1236	established_upcall(ep);
1237	put_ep(&ep->com);
1238	return 0;
1239err1:
1240	ep->com.cm_id = NULL;
1241	ep->com.qp = NULL;
1242	cm_id->rem_ref(cm_id);
1243err:
1244	put_ep(&ep->com);
1245	return err;
1246}
1247
1248static int init_sock(struct iwch_ep_common *epc)
1249{
1250	int err;
1251	struct sockopt sopt;
1252	int on=1;
1253
1254	SOCK_LOCK(epc->so);
1255	soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc);
1256	epc->so->so_state |= SS_NBIO;
1257	SOCK_UNLOCK(epc->so);
1258	sopt.sopt_dir = SOPT_SET;
1259	sopt.sopt_level = IPPROTO_TCP;
1260	sopt.sopt_name = TCP_NODELAY;
1261	sopt.sopt_val = (caddr_t)&on;
1262	sopt.sopt_valsize = sizeof on;
1263	sopt.sopt_td = NULL;
1264	err = sosetopt(epc->so, &sopt);
1265	if (err)
1266		printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err);
1267
1268	return 0;
1269}
1270
1271static int
1272is_loopback_dst(struct iw_cm_id *cm_id)
1273{
1274	uint16_t port = cm_id->remote_addr.sin_port;
1275	int ifa_present;
1276
1277	cm_id->remote_addr.sin_port = 0;
1278	ifa_present = ifa_ifwithaddr_check(
1279	    (struct sockaddr *)&cm_id->remote_addr);
1280	cm_id->remote_addr.sin_port = port;
1281	return (ifa_present);
1282}
1283
1284int
1285iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1286{
1287	int err = 0;
1288	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1289	struct iwch_ep *ep;
1290	struct nhop4_extended nh4;
1291	struct toedev *tdev;
1292
1293	if (is_loopback_dst(cm_id)) {
1294		err = -ENOSYS;
1295		goto out;
1296	}
1297
1298	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1299	if (!ep) {
1300		printf("%s - cannot alloc ep.\n", __FUNCTION__);
1301		err = (-ENOMEM);
1302		goto out;
1303	}
1304	callout_init(&ep->timer, 1);
1305	ep->plen = conn_param->private_data_len;
1306	if (ep->plen)
1307		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1308		       conn_param->private_data, ep->plen);
1309	ep->ird = conn_param->ird;
1310	ep->ord = conn_param->ord;
1311
1312	cm_id->add_ref(cm_id);
1313	ep->com.cm_id = cm_id;
1314	ep->com.qp = get_qhp(h, conn_param->qpn);
1315	ep->com.thread = curthread;
1316	PANIC_IF(!ep->com.qp);
1317	CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn,
1318	     ep->com.qp, cm_id);
1319
1320	ep->com.so = cm_id->so;
1321	err = init_sock(&ep->com);
1322	if (err)
1323		goto fail2;
1324
1325	/* find a route */
1326	err = find_route(cm_id->local_addr.sin_addr.s_addr,
1327			cm_id->remote_addr.sin_addr.s_addr,
1328			cm_id->local_addr.sin_port,
1329			cm_id->remote_addr.sin_port, IPTOS_LOWDELAY, &nh4);
1330	if (err) {
1331		printf("%s - cannot find route.\n", __FUNCTION__);
1332		err = EHOSTUNREACH;
1333		goto fail2;
1334	}
1335
1336	if (!(nh4.nh_ifp->if_flags & IFCAP_TOE)) {
1337		printf("%s - interface not TOE capable.\n", __FUNCTION__);
1338		fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
1339		goto fail2;
1340	}
1341	tdev = TOEDEV(nh4.nh_ifp);
1342	if (tdev == NULL) {
1343		printf("%s - No toedev for interface.\n", __FUNCTION__);
1344		fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
1345		goto fail2;
1346	}
1347	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
1348
1349	state_set(&ep->com, CONNECTING);
1350	ep->com.local_addr = cm_id->local_addr;
1351	ep->com.remote_addr = cm_id->remote_addr;
1352	err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
1353		ep->com.thread);
1354	if (!err)
1355		goto out;
1356fail2:
1357	put_ep(&ep->com);
1358out:
1359	return err;
1360}
1361
1362int
1363iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
1364{
1365	int err = 0;
1366	struct iwch_listen_ep *ep;
1367
1368	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1369	if (!ep) {
1370		printf("%s - cannot alloc ep.\n", __FUNCTION__);
1371		err = ENOMEM;
1372		goto out;
1373	}
1374	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1375	cm_id->add_ref(cm_id);
1376	ep->com.cm_id = cm_id;
1377	ep->backlog = backlog;
1378	ep->com.local_addr = cm_id->local_addr;
1379	ep->com.thread = curthread;
1380	state_set(&ep->com, LISTEN);
1381
1382	ep->com.so = cm_id->so;
1383	cm_id->provider_data = ep;
1384out:
1385	return err;
1386}
1387
1388void
1389iwch_destroy_listen_ep(struct iw_cm_id *cm_id)
1390{
1391	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
1392
1393	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1394
1395	state_set(&ep->com, DEAD);
1396	cm_id->rem_ref(cm_id);
1397	put_ep(&ep->com);
1398	return;
1399}
1400
1401int
1402iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags)
1403{
1404	int close = 0;
1405
1406	mtx_lock(&ep->com.lock);
1407
1408	PANIC_IF(!ep);
1409	PANIC_IF(!ep->com.so);
1410
1411	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
1412	     ep->com.so, states[ep->com.state], abrupt);
1413
1414	switch (ep->com.state) {
1415	case MPA_REQ_WAIT:
1416	case MPA_REQ_SENT:
1417	case MPA_REQ_RCVD:
1418	case MPA_REP_SENT:
1419	case FPDU_MODE:
1420		close = 1;
1421		if (abrupt)
1422			ep->com.state = ABORTING;
1423		else {
1424			ep->com.state = CLOSING;
1425			start_ep_timer(ep);
1426		}
1427		break;
1428	case CLOSING:
1429		close = 1;
1430		if (abrupt) {
1431			stop_ep_timer(ep);
1432			ep->com.state = ABORTING;
1433		} else
1434			ep->com.state = MORIBUND;
1435		break;
1436	case MORIBUND:
1437	case ABORTING:
1438	case DEAD:
1439		CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n",
1440			__func__, ep, ep->com.state);
1441		break;
1442	default:
1443		panic("unknown state: %d\n", ep->com.state);
1444		break;
1445	}
1446
1447	mtx_unlock(&ep->com.lock);
1448	if (close) {
1449		if (abrupt)
1450			abort_connection(ep);
1451		else {
1452			if (!ep->parent_ep)
1453				__state_set(&ep->com, MORIBUND);
1454			shutdown_socket(&ep->com);
1455		}
1456	}
1457	return 0;
1458}
1459
1460static void
1461process_data(struct iwch_ep *ep)
1462{
1463	struct sockaddr_in *local, *remote;
1464
1465	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1466
1467	switch (state_read(&ep->com)) {
1468	case MPA_REQ_SENT:
1469		process_mpa_reply(ep);
1470		break;
1471	case MPA_REQ_WAIT:
1472
1473		/*
1474		 * XXX
1475		 * Set local and remote addrs here because when we
1476		 * dequeue the newly accepted socket, they aren't set
1477		 * yet in the pcb!
1478		 */
1479		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
1480		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
1481		CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__,
1482			inet_ntoa(local->sin_addr),
1483			inet_ntoa(remote->sin_addr));
1484		ep->com.local_addr = *local;
1485		ep->com.remote_addr = *remote;
1486		free(local, M_SONAME);
1487		free(remote, M_SONAME);
1488		process_mpa_request(ep);
1489		break;
1490	default:
1491		if (sbavail(&ep->com.so->so_rcv))
1492			printf("%s Unexpected streaming data."
1493			       " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n",
1494			       __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state,
1495			       sbavail(&ep->com.so->so_rcv), ep->com.so->so_rcv.sb_mb);
1496		break;
1497	}
1498	return;
1499}
1500
1501static void
1502process_connected(struct iwch_ep *ep)
1503{
1504	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1505	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
1506		send_mpa_req(ep);
1507	} else {
1508		connect_reply_upcall(ep, -ep->com.so->so_error);
1509		close_socket(&ep->com, 0);
1510		state_set(&ep->com, DEAD);
1511		put_ep(&ep->com);
1512	}
1513}
1514
1515void
1516process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
1517{
1518	struct iwch_ep *child_ep;
1519	struct sockaddr_in *local;
1520	struct sockaddr_in *remote;
1521	struct iwch_ep *parent_ep = parent_cm_id->provider_data;
1522
1523	CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
1524	if (!child_so) {
1525		log(LOG_ERR, "%s - invalid child socket!\n", __func__);
1526		return;
1527	}
1528	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
1529	if (!child_ep) {
1530		log(LOG_ERR, "%s - failed to allocate ep entry!\n",
1531		       __FUNCTION__);
1532		return;
1533	}
1534	SOCKBUF_LOCK(&child_so->so_rcv);
1535	soupcall_set(child_so, SO_RCV, iwch_so_upcall, child_ep);
1536	SOCKBUF_UNLOCK(&child_so->so_rcv);
1537
1538	in_getsockaddr(child_so, (struct sockaddr **)&local);
1539	in_getpeeraddr(child_so, (struct sockaddr **)&remote);
1540
1541	CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
1542		inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
1543	child_ep->com.tdev = parent_ep->com.tdev;
1544	child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family;
1545	child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port;
1546	child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr;
1547	child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len;
1548	child_ep->com.remote_addr.sin_family = remote->sin_family;
1549	child_ep->com.remote_addr.sin_port = remote->sin_port;
1550	child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr;
1551	child_ep->com.remote_addr.sin_len = remote->sin_len;
1552	child_ep->com.so = child_so;
1553	child_ep->com.cm_id = NULL;
1554	child_ep->com.thread = parent_ep->com.thread;
1555	child_ep->parent_ep = parent_ep;
1556
1557	free(local, M_SONAME);
1558	free(remote, M_SONAME);
1559	get_ep(&parent_ep->com);
1560	callout_init(&child_ep->timer, 1);
1561	state_set(&child_ep->com, MPA_REQ_WAIT);
1562	start_ep_timer(child_ep);
1563
1564	/* maybe the request has already been queued up on the socket... */
1565	process_mpa_request(child_ep);
1566}
1567
1568static int
1569iwch_so_upcall(struct socket *so, void *arg, int waitflag)
1570{
1571	struct iwch_ep *ep = arg;
1572
1573	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1574	mtx_lock(&req_lock);
1575	if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
1576		get_ep(&ep->com);
1577		TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
1578		taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task);
1579	}
1580	mtx_unlock(&req_lock);
1581	return (SU_OK);
1582}
1583
1584static void
1585process_socket_event(struct iwch_ep *ep)
1586{
1587	int state = state_read(&ep->com);
1588	struct socket *so = ep->com.so;
1589
1590	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1591	if (state == CONNECTING) {
1592		process_connected(ep);
1593		return;
1594	}
1595
1596	if (state == LISTEN) {
1597		/* socket listening events are handled at IWCM */
1598		CTR3(KTR_IW_CXGB, "%s Invalid ep state:%u, ep:%p", __func__,
1599			ep->com.state, ep);
1600		BUG();
1601		return;
1602	}
1603
1604	/* connection error */
1605	if (so->so_error) {
1606		process_conn_error(ep);
1607		return;
1608	}
1609
1610	/* peer close */
1611	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
1612		process_peer_close(ep);
1613		return;
1614	}
1615
1616	/* close complete */
1617	if (so->so_state & (SS_ISDISCONNECTED)) {
1618		process_close_complete(ep);
1619		return;
1620	}
1621
1622	/* rx data */
1623	process_data(ep);
1624	return;
1625}
1626
1627static void
1628process_req(void *ctx, int pending)
1629{
1630	struct iwch_ep_common *epc;
1631
1632	CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__);
1633	mtx_lock(&req_lock);
1634	while (!TAILQ_EMPTY(&req_list)) {
1635		epc = TAILQ_FIRST(&req_list);
1636		TAILQ_REMOVE(&req_list, epc, entry);
1637		epc->entry.tqe_prev = NULL;
1638		mtx_unlock(&req_lock);
1639		if (epc->so)
1640			process_socket_event((struct iwch_ep *)epc);
1641		put_ep(epc);
1642		mtx_lock(&req_lock);
1643	}
1644	mtx_unlock(&req_lock);
1645}
1646
1647int
1648iwch_cm_init(void)
1649{
1650	TAILQ_INIT(&req_list);
1651	mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF);
1652	iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT,
1653		taskqueue_thread_enqueue, &iw_cxgb_taskq);
1654        if (iw_cxgb_taskq == NULL) {
1655                printf("failed to allocate iw_cxgb taskqueue\n");
1656                return (ENOMEM);
1657        }
1658        taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
1659        TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
1660	return (0);
1661}
1662
1663void
1664iwch_cm_term(void)
1665{
1666
1667	taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
1668	taskqueue_free(iw_cxgb_taskq);
1669}
1670
1671void
1672iwch_cm_init_cpl(struct adapter *sc)
1673{
1674
1675	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
1676	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status);
1677}
1678
1679void
1680iwch_cm_term_cpl(struct adapter *sc)
1681{
1682
1683	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
1684	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL);
1685}
1686#endif
1687