1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c 309378 2016-12-01 23:38:52Z jhb $");
31
32#include "opt_inet.h"
33
34#ifdef TCP_OFFLOAD
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/pciio.h>
40#include <sys/conf.h>
41#include <machine/bus.h>
42#include <machine/resource.h>
43#include <sys/bus_dma.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/rwlock.h>
48#include <sys/linker.h>
49#include <sys/firmware.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59#include <sys/uio.h>
60
61#include <net/route.h>
62#include <netinet/in_systm.h>
63#include <netinet/in.h>
64#include <netinet/in_pcb.h>
65#include <netinet/ip.h>
66#include <netinet/ip_var.h>
67#include <netinet/tcp_var.h>
68#include <netinet/tcp.h>
69#include <netinet/tcpip.h>
70
71#include <rdma/ib_verbs.h>
72#include <linux/idr.h>
73#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
74
75#include <cxgb_include.h>
76#include <ulp/tom/cxgb_tom.h>
77#include <ulp/tom/cxgb_toepcb.h>
78#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
79#include <rdma/ib_verbs.h>
80#include <linux/idr.h>
81
82#include <ulp/iw_cxgb/iw_cxgb_wr.h>
83#include <ulp/iw_cxgb/iw_cxgb_hal.h>
84#include <ulp/iw_cxgb/iw_cxgb_provider.h>
85#include <ulp/iw_cxgb/iw_cxgb_cm.h>
86#include <ulp/iw_cxgb/iw_cxgb.h>
87
88#ifdef KTR
89static char *states[] = {
90	"idle",
91	"listen",
92	"connecting",
93	"mpa_wait_req",
94	"mpa_req_sent",
95	"mpa_req_rcvd",
96	"mpa_rep_sent",
97	"fpdu_mode",
98	"aborting",
99	"closing",
100	"moribund",
101	"dead",
102	NULL,
103};
104#endif
105
106SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
107
108static int ep_timeout_secs = 60;
109TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs);
110SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0,
111    "CM Endpoint operation timeout in seconds (default=60)");
112
113static int mpa_rev = 1;
114TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev);
115SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
116    "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
117
118static int markers_enabled = 0;
119TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled);
120SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0,
121    "Enable MPA MARKERS (default(0)=disabled)");
122
123static int crc_enabled = 1;
124TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled);
125SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0,
126    "Enable MPA CRC (default(1)=enabled)");
127
128static int rcv_win = 256 * 1024;
129TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win);
130SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0,
131    "TCP receive window in bytes (default=256KB)");
132
133static int snd_win = 32 * 1024;
134TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win);
135SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0,
136    "TCP send window in bytes (default=32KB)");
137
138static unsigned int nocong = 0;
139TUNABLE_INT("hw.iw_cxgb.nocong", &nocong);
140SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RW, &nocong, 0,
141    "Turn off congestion control (default=0)");
142
143static unsigned int cong_flavor = 1;
144TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor);
145SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RW, &cong_flavor, 0,
146    "TCP Congestion control flavor (default=1)");
147
148static void ep_timeout(void *arg);
149static void connect_reply_upcall(struct iwch_ep *ep, int status);
150static int iwch_so_upcall(struct socket *so, void *arg, int waitflag);
151
152/*
153 * Cruft to offload socket upcalls onto thread.
154 */
155static struct mtx req_lock;
156static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list;
157static struct task iw_cxgb_task;
158static struct taskqueue *iw_cxgb_taskq;
159static void process_req(void *ctx, int pending);
160
161static void
162start_ep_timer(struct iwch_ep *ep)
163{
164	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
165	if (callout_pending(&ep->timer)) {
166		CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep);
167		callout_deactivate(&ep->timer);
168		callout_drain(&ep->timer);
169	} else {
170		/*
171		 * XXX this looks racy
172		 */
173		get_ep(&ep->com);
174		callout_init(&ep->timer, TRUE);
175	}
176	callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep);
177}
178
179static void
180stop_ep_timer(struct iwch_ep *ep)
181{
182	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
183	if (!callout_pending(&ep->timer)) {
184		CTR3(KTR_IW_CXGB, "%s timer stopped when its not running!  ep %p state %u\n",
185                       __func__, ep, ep->com.state);
186		return;
187	}
188	callout_drain(&ep->timer);
189	put_ep(&ep->com);
190}
191
192static int
193set_tcpinfo(struct iwch_ep *ep)
194{
195	struct socket *so = ep->com.so;
196	struct inpcb *inp = sotoinpcb(so);
197	struct tcpcb *tp;
198	struct toepcb *toep;
199	int rc = 0;
200
201	INP_WLOCK(inp);
202	tp = intotcpcb(inp);
203
204	if ((tp->t_flags & TF_TOE) == 0) {
205		rc = EINVAL;
206		printf("%s: connection NOT OFFLOADED!\n", __func__);
207		goto done;
208	}
209	toep = tp->t_toe;
210
211	ep->hwtid = toep->tp_tid;
212	ep->snd_seq = tp->snd_nxt;
213	ep->rcv_seq = tp->rcv_nxt;
214	ep->emss = tp->t_maxseg;
215	if (ep->emss < 128)
216		ep->emss = 128;
217done:
218	INP_WUNLOCK(inp);
219	return (rc);
220
221}
222
223static enum iwch_ep_state
224state_read(struct iwch_ep_common *epc)
225{
226	enum iwch_ep_state state;
227
228	mtx_lock(&epc->lock);
229	state = epc->state;
230	mtx_unlock(&epc->lock);
231	return state;
232}
233
234static void
235__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
236{
237	epc->state = new;
238}
239
240static void
241state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
242{
243
244	mtx_lock(&epc->lock);
245	CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]);
246	__state_set(epc, new);
247	mtx_unlock(&epc->lock);
248	return;
249}
250
251static void *
252alloc_ep(int size, int flags)
253{
254	struct iwch_ep_common *epc;
255
256	epc = malloc(size, M_DEVBUF, flags);
257	if (epc) {
258		memset(epc, 0, size);
259		refcount_init(&epc->refcount, 1);
260		mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK);
261		cv_init(&epc->waitq, "iwch_epc cv");
262	}
263	CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc);
264	return epc;
265}
266
267void __free_ep(struct iwch_ep_common *epc)
268{
269	CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
270	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
271	free(epc, M_DEVBUF);
272}
273
274static struct rtentry *
275find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
276    __be16 peer_port, u8 tos)
277{
278        struct route iproute;
279        struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
280
281        bzero(&iproute, sizeof iproute);
282	dst->sin_family = AF_INET;
283	dst->sin_len = sizeof *dst;
284        dst->sin_addr.s_addr = peer_ip;
285
286        rtalloc(&iproute);
287	return iproute.ro_rt;
288}
289
290static void
291close_socket(struct iwch_ep_common *epc, int close)
292{
293	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
294	SOCK_LOCK(epc->so);
295	soupcall_clear(epc->so, SO_RCV);
296	SOCK_UNLOCK(epc->so);
297	if (close)
298		soclose(epc->so);
299	else
300		soshutdown(epc->so, SHUT_WR|SHUT_RD);
301	epc->so = NULL;
302}
303
304static void
305shutdown_socket(struct iwch_ep_common *epc)
306{
307	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
308	soshutdown(epc->so, SHUT_WR);
309}
310
311static void
312abort_socket(struct iwch_ep *ep)
313{
314	struct sockopt sopt;
315	int err;
316	struct linger l;
317
318	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
319	l.l_onoff = 1;
320	l.l_linger = 0;
321
322	/* linger_time of 0 forces RST to be sent */
323	sopt.sopt_dir = SOPT_SET;
324	sopt.sopt_level = SOL_SOCKET;
325	sopt.sopt_name = SO_LINGER;
326	sopt.sopt_val = (caddr_t)&l;
327	sopt.sopt_valsize = sizeof l;
328	sopt.sopt_td = NULL;
329	err = sosetopt(ep->com.so, &sopt);
330	if (err)
331		printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err);
332}
333
334static void
335send_mpa_req(struct iwch_ep *ep)
336{
337	int mpalen;
338	struct mpa_message *mpa;
339	struct mbuf *m;
340	int err;
341
342	CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen);
343
344	mpalen = sizeof(*mpa) + ep->plen;
345	m = m_gethdr(mpalen, M_NOWAIT);
346	if (m == NULL) {
347		connect_reply_upcall(ep, -ENOMEM);
348		return;
349	}
350	mpa = mtod(m, struct mpa_message *);
351	m->m_len = mpalen;
352	m->m_pkthdr.len = mpalen;
353	memset(mpa, 0, sizeof(*mpa));
354	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
355	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
356		     (markers_enabled ? MPA_MARKERS : 0);
357	mpa->private_data_size = htons(ep->plen);
358	mpa->revision = mpa_rev;
359	if (ep->plen)
360		memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
361
362	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
363	if (err) {
364		m_freem(m);
365		connect_reply_upcall(ep, -ENOMEM);
366		return;
367	}
368
369	start_ep_timer(ep);
370	state_set(&ep->com, MPA_REQ_SENT);
371	return;
372}
373
374static int
375send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
376{
377	int mpalen;
378	struct mpa_message *mpa;
379	struct mbuf *m;
380	int err;
381
382	CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen);
383
384	mpalen = sizeof(*mpa) + plen;
385
386	m = m_gethdr(mpalen, M_NOWAIT);
387	if (m == NULL) {
388		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
389		return (-ENOMEM);
390	}
391	mpa = mtod(m, struct mpa_message *);
392	m->m_len = mpalen;
393	m->m_pkthdr.len = mpalen;
394	memset(mpa, 0, sizeof(*mpa));
395	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
396	mpa->flags = MPA_REJECT;
397	mpa->revision = mpa_rev;
398	mpa->private_data_size = htons(plen);
399	if (plen)
400		memcpy(mpa->private_data, pdata, plen);
401	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
402	PANIC_IF(err);
403	return 0;
404}
405
406static int
407send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
408{
409	int mpalen;
410	struct mpa_message *mpa;
411	struct mbuf *m;
412
413	CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen);
414
415	mpalen = sizeof(*mpa) + plen;
416
417	m = m_gethdr(mpalen, M_NOWAIT);
418	if (m == NULL) {
419		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
420		return (-ENOMEM);
421	}
422	mpa = mtod(m, struct mpa_message *);
423	m->m_len = mpalen;
424	m->m_pkthdr.len = mpalen;
425	memset(mpa, 0, sizeof(*mpa));
426	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
427	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
428		     (markers_enabled ? MPA_MARKERS : 0);
429	mpa->revision = mpa_rev;
430	mpa->private_data_size = htons(plen);
431	if (plen)
432		memcpy(mpa->private_data, pdata, plen);
433
434	state_set(&ep->com, MPA_REP_SENT);
435	return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
436		ep->com.thread);
437}
438
439static void
440close_complete_upcall(struct iwch_ep *ep)
441{
442	struct iw_cm_event event;
443
444	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
445	memset(&event, 0, sizeof(event));
446	event.event = IW_CM_EVENT_CLOSE;
447	if (ep->com.cm_id) {
448		CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d",
449		     ep, ep->com.cm_id, ep->hwtid);
450		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
451		ep->com.cm_id->rem_ref(ep->com.cm_id);
452		ep->com.cm_id = NULL;
453		ep->com.qp = NULL;
454	}
455}
456
457static void
458abort_connection(struct iwch_ep *ep)
459{
460	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
461	state_set(&ep->com, ABORTING);
462	abort_socket(ep);
463	close_socket(&ep->com, 0);
464	close_complete_upcall(ep);
465	state_set(&ep->com, DEAD);
466	put_ep(&ep->com);
467}
468
469static void
470peer_close_upcall(struct iwch_ep *ep)
471{
472	struct iw_cm_event event;
473
474	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
475	memset(&event, 0, sizeof(event));
476	event.event = IW_CM_EVENT_DISCONNECT;
477	if (ep->com.cm_id) {
478		CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d",
479		     ep, ep->com.cm_id, ep->hwtid);
480		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
481	}
482}
483
484static void
485peer_abort_upcall(struct iwch_ep *ep)
486{
487	struct iw_cm_event event;
488
489	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
490	memset(&event, 0, sizeof(event));
491	event.event = IW_CM_EVENT_CLOSE;
492	event.status = ECONNRESET;
493	if (ep->com.cm_id) {
494		CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep,
495		     ep->com.cm_id, ep->hwtid);
496		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
497		ep->com.cm_id->rem_ref(ep->com.cm_id);
498		ep->com.cm_id = NULL;
499		ep->com.qp = NULL;
500	}
501}
502
503static void
504connect_reply_upcall(struct iwch_ep *ep, int status)
505{
506	struct iw_cm_event event;
507
508	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status);
509	memset(&event, 0, sizeof(event));
510	event.event = IW_CM_EVENT_CONNECT_REPLY;
511	event.status = status;
512	event.local_addr = ep->com.local_addr;
513	event.remote_addr = ep->com.remote_addr;
514
515	if ((status == 0) || (status == ECONNREFUSED)) {
516		event.private_data_len = ep->plen;
517		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
518	}
519	if (ep->com.cm_id) {
520		CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep,
521		     ep->hwtid, status);
522		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
523	}
524	if (status < 0) {
525		ep->com.cm_id->rem_ref(ep->com.cm_id);
526		ep->com.cm_id = NULL;
527		ep->com.qp = NULL;
528	}
529}
530
531static void
532connect_request_upcall(struct iwch_ep *ep)
533{
534	struct iw_cm_event event;
535
536	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
537	memset(&event, 0, sizeof(event));
538	event.event = IW_CM_EVENT_CONNECT_REQUEST;
539	event.local_addr = ep->com.local_addr;
540	event.remote_addr = ep->com.remote_addr;
541	event.private_data_len = ep->plen;
542	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
543	event.provider_data = ep;
544	event.so = ep->com.so;
545	if (state_read(&ep->parent_ep->com) != DEAD) {
546		get_ep(&ep->com);
547		ep->parent_ep->com.cm_id->event_handler(
548						ep->parent_ep->com.cm_id,
549						&event);
550	}
551	put_ep(&ep->parent_ep->com);
552}
553
554static void
555established_upcall(struct iwch_ep *ep)
556{
557	struct iw_cm_event event;
558
559	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
560	memset(&event, 0, sizeof(event));
561	event.event = IW_CM_EVENT_ESTABLISHED;
562	if (ep->com.cm_id) {
563		CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid);
564		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
565	}
566}
567
568static void
569process_mpa_reply(struct iwch_ep *ep)
570{
571	struct mpa_message *mpa;
572	u16 plen;
573	struct iwch_qp_attributes attrs;
574	enum iwch_qp_attr_mask mask;
575	int err;
576	struct mbuf *top, *m;
577	int flags = MSG_DONTWAIT;
578	struct uio uio;
579	int len;
580
581	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
582
583	/*
584	 * Stop mpa timer.  If it expired, then the state has
585	 * changed and we bail since ep_timeout already aborted
586	 * the connection.
587	 */
588	stop_ep_timer(ep);
589	if (state_read(&ep->com) != MPA_REQ_SENT)
590		return;
591
592	uio.uio_resid = len = 1000000;
593	uio.uio_td = ep->com.thread;
594	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
595	if (err) {
596		if (err == EWOULDBLOCK) {
597			start_ep_timer(ep);
598			return;
599		}
600		err = -err;
601		goto err;
602	}
603
604	if (ep->com.so->so_rcv.sb_mb) {
605		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
606			__FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
607	}
608
609	m = top;
610	do {
611		/*
612		 * If we get more than the supported amount of private data
613		 * then we must fail this connection.
614		 */
615		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
616			err = (-EINVAL);
617			goto err;
618		}
619
620		/*
621		 * copy the new data into our accumulation buffer.
622		 */
623		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
624		ep->mpa_pkt_len += m->m_len;
625		if (!m->m_next)
626			m = m->m_nextpkt;
627		else
628			m = m->m_next;
629	} while (m);
630
631	m_freem(top);
632
633	/*
634	 * if we don't even have the mpa message, then bail.
635	 */
636	if (ep->mpa_pkt_len < sizeof(*mpa))
637		return;
638	mpa = (struct mpa_message *)ep->mpa_pkt;
639
640	/* Validate MPA header. */
641	if (mpa->revision != mpa_rev) {
642		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
643		err = EPROTO;
644		goto err;
645	}
646	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
647		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
648		err = EPROTO;
649		goto err;
650	}
651
652	plen = ntohs(mpa->private_data_size);
653
654	/*
655	 * Fail if there's too much private data.
656	 */
657	if (plen > MPA_MAX_PRIVATE_DATA) {
658		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
659		err = EPROTO;
660		goto err;
661	}
662
663	/*
664	 * If plen does not account for pkt size
665	 */
666	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
667		CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len);
668		err = EPROTO;
669		goto err;
670	}
671
672	ep->plen = (u8) plen;
673
674	/*
675	 * If we don't have all the pdata yet, then bail.
676	 * We'll continue process when more data arrives.
677	 */
678	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
679		return;
680
681	if (mpa->flags & MPA_REJECT) {
682		err = ECONNREFUSED;
683		goto err;
684	}
685
686	/*
687	 * If we get here we have accumulated the entire mpa
688	 * start reply message including private data. And
689	 * the MPA header is valid.
690	 */
691	CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
692	state_set(&ep->com, FPDU_MODE);
693	ep->mpa_attr.initiator = 1;
694	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
695	ep->mpa_attr.recv_marker_enabled = markers_enabled;
696	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
697	ep->mpa_attr.version = mpa_rev;
698	if (set_tcpinfo(ep)) {
699		printf("%s set_tcpinfo error\n", __FUNCTION__);
700		goto err;
701	}
702	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
703	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
704	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
705	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
706
707	attrs.mpa_attr = ep->mpa_attr;
708	attrs.max_ird = ep->ird;
709	attrs.max_ord = ep->ord;
710	attrs.llp_stream_handle = ep;
711	attrs.next_state = IWCH_QP_STATE_RTS;
712
713	mask = IWCH_QP_ATTR_NEXT_STATE |
714	    IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
715	    IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
716
717	/* bind QP and TID with INIT_WR */
718	err = iwch_modify_qp(ep->com.qp->rhp,
719			     ep->com.qp, mask, &attrs, 1);
720	if (!err)
721		goto out;
722err:
723	abort_connection(ep);
724out:
725	connect_reply_upcall(ep, err);
726	return;
727}
728
729static void
730process_mpa_request(struct iwch_ep *ep)
731{
732	struct mpa_message *mpa;
733	u16 plen;
734	int flags = MSG_DONTWAIT;
735	struct mbuf *top, *m;
736	int err;
737	struct uio uio;
738	int len;
739
740	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
741
742	/*
743	 * Stop mpa timer.  If it expired, then the state has
744	 * changed and we bail since ep_timeout already aborted
745	 * the connection.
746	 */
747	stop_ep_timer(ep);
748	if (state_read(&ep->com) != MPA_REQ_WAIT)
749		return;
750
751	uio.uio_resid = len = 1000000;
752	uio.uio_td = ep->com.thread;
753	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
754	if (err) {
755		if (err == EWOULDBLOCK) {
756			start_ep_timer(ep);
757			return;
758		}
759		err = -err;
760		goto err;
761	}
762
763	m = top;
764	do {
765
766		/*
767		 * If we get more than the supported amount of private data
768		 * then we must fail this connection.
769		 */
770		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
771			CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__,
772				ep->mpa_pkt_len + m->m_len);
773			goto err;
774		}
775
776
777		/*
778		 * Copy the new data into our accumulation buffer.
779		 */
780		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
781		ep->mpa_pkt_len += m->m_len;
782
783		if (!m->m_next)
784			m = m->m_nextpkt;
785		else
786			m = m->m_next;
787	} while (m);
788
789	m_freem(top);
790
791	/*
792	 * If we don't even have the mpa message, then bail.
793	 * We'll continue process when more data arrives.
794	 */
795	if (ep->mpa_pkt_len < sizeof(*mpa)) {
796		start_ep_timer(ep);
797		CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__,
798			ep->mpa_pkt_len);
799		return;
800	}
801	mpa = (struct mpa_message *) ep->mpa_pkt;
802
803	/*
804	 * Validate MPA Header.
805	 */
806	if (mpa->revision != mpa_rev) {
807		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
808		goto err;
809	}
810
811	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
812		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
813		goto err;
814	}
815
816	plen = ntohs(mpa->private_data_size);
817
818	/*
819	 * Fail if there's too much private data.
820	 */
821	if (plen > MPA_MAX_PRIVATE_DATA) {
822		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
823		goto err;
824	}
825
826	/*
827	 * If plen does not account for pkt size
828	 */
829	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
830		CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__,
831			ep->mpa_pkt_len);
832		goto err;
833	}
834	ep->plen = (u8) plen;
835
836	/*
837	 * If we don't have all the pdata yet, then bail.
838	 */
839	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
840		start_ep_timer(ep);
841		CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__,
842			ep->mpa_pkt_len);
843		return;
844	}
845
846	/*
847	 * If we get here we have accumulated the entire mpa
848	 * start reply message including private data.
849	 */
850	ep->mpa_attr.initiator = 0;
851	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
852	ep->mpa_attr.recv_marker_enabled = markers_enabled;
853	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
854	ep->mpa_attr.version = mpa_rev;
855	if (set_tcpinfo(ep)) {
856		printf("%s set_tcpinfo error\n", __FUNCTION__);
857		goto err;
858	}
859	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
860	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
861	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
862	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
863
864	state_set(&ep->com, MPA_REQ_RCVD);
865
866	/* drive upcall */
867	connect_request_upcall(ep);
868	return;
869err:
870	abort_connection(ep);
871	return;
872}
873
874static void
875process_peer_close(struct iwch_ep *ep)
876{
877	struct iwch_qp_attributes attrs;
878	int disconnect = 1;
879	int release = 0;
880
881	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
882
883	mtx_lock(&ep->com.lock);
884	switch (ep->com.state) {
885	case MPA_REQ_WAIT:
886		__state_set(&ep->com, CLOSING);
887		break;
888	case MPA_REQ_SENT:
889		__state_set(&ep->com, CLOSING);
890		connect_reply_upcall(ep, -ECONNRESET);
891		break;
892	case MPA_REQ_RCVD:
893
894		/*
895		 * We're gonna mark this puppy DEAD, but keep
896		 * the reference on it until the ULP accepts or
897		 * rejects the CR.
898		 */
899		__state_set(&ep->com, CLOSING);
900		break;
901	case MPA_REP_SENT:
902		__state_set(&ep->com, CLOSING);
903		break;
904	case FPDU_MODE:
905		start_ep_timer(ep);
906		__state_set(&ep->com, CLOSING);
907		attrs.next_state = IWCH_QP_STATE_CLOSING;
908		iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
909			       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
910		peer_close_upcall(ep);
911		break;
912	case ABORTING:
913		disconnect = 0;
914		break;
915	case CLOSING:
916		__state_set(&ep->com, MORIBUND);
917		disconnect = 0;
918		break;
919	case MORIBUND:
920		stop_ep_timer(ep);
921		if (ep->com.cm_id && ep->com.qp) {
922			attrs.next_state = IWCH_QP_STATE_IDLE;
923			iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
924				       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
925		}
926		close_socket(&ep->com, 0);
927		close_complete_upcall(ep);
928		__state_set(&ep->com, DEAD);
929		release = 1;
930		disconnect = 0;
931		break;
932	case DEAD:
933		disconnect = 0;
934		break;
935	default:
936		PANIC_IF(1);
937	}
938	mtx_unlock(&ep->com.lock);
939	if (disconnect)
940		iwch_ep_disconnect(ep, 0, M_NOWAIT);
941	if (release)
942		put_ep(&ep->com);
943	return;
944}
945
946static void
947process_conn_error(struct iwch_ep *ep)
948{
949	struct iwch_qp_attributes attrs;
950	int ret;
951
952	mtx_lock(&ep->com.lock);
953	CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state);
954	switch (ep->com.state) {
955	case MPA_REQ_WAIT:
956		stop_ep_timer(ep);
957		break;
958	case MPA_REQ_SENT:
959		stop_ep_timer(ep);
960		connect_reply_upcall(ep, -ECONNRESET);
961		break;
962	case MPA_REP_SENT:
963		ep->com.rpl_err = ECONNRESET;
964		CTR1(KTR_IW_CXGB, "waking up ep %p", ep);
965		break;
966	case MPA_REQ_RCVD:
967
968		/*
969		 * We're gonna mark this puppy DEAD, but keep
970		 * the reference on it until the ULP accepts or
971		 * rejects the CR.
972		 */
973		break;
974	case MORIBUND:
975	case CLOSING:
976		stop_ep_timer(ep);
977		/*FALLTHROUGH*/
978	case FPDU_MODE:
979		if (ep->com.cm_id && ep->com.qp) {
980			attrs.next_state = IWCH_QP_STATE_ERROR;
981			ret = iwch_modify_qp(ep->com.qp->rhp,
982				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
983				     &attrs, 1);
984			if (ret)
985				log(LOG_ERR,
986				       "%s - qp <- error failed!\n",
987				       __FUNCTION__);
988		}
989		peer_abort_upcall(ep);
990		break;
991	case ABORTING:
992		break;
993	case DEAD:
994		mtx_unlock(&ep->com.lock);
995		CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__,
996			ep->com.so->so_error);
997		return;
998	default:
999		PANIC_IF(1);
1000		break;
1001	}
1002
1003	if (ep->com.state != ABORTING) {
1004		close_socket(&ep->com, 0);
1005		__state_set(&ep->com, DEAD);
1006		put_ep(&ep->com);
1007	}
1008	mtx_unlock(&ep->com.lock);
1009	return;
1010}
1011
1012static void
1013process_close_complete(struct iwch_ep *ep)
1014{
1015	struct iwch_qp_attributes attrs;
1016	int release = 0;
1017
1018	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1019	PANIC_IF(!ep);
1020
1021	/* The cm_id may be null if we failed to connect */
1022	mtx_lock(&ep->com.lock);
1023	switch (ep->com.state) {
1024	case CLOSING:
1025		__state_set(&ep->com, MORIBUND);
1026		break;
1027	case MORIBUND:
1028		stop_ep_timer(ep);
1029		if ((ep->com.cm_id) && (ep->com.qp)) {
1030			attrs.next_state = IWCH_QP_STATE_IDLE;
1031			iwch_modify_qp(ep->com.qp->rhp,
1032					     ep->com.qp,
1033					     IWCH_QP_ATTR_NEXT_STATE,
1034					     &attrs, 1);
1035		}
1036		if (ep->parent_ep)
1037			close_socket(&ep->com, 1);
1038		else
1039			close_socket(&ep->com, 0);
1040		close_complete_upcall(ep);
1041		__state_set(&ep->com, DEAD);
1042		release = 1;
1043		break;
1044	case ABORTING:
1045		break;
1046	case DEAD:
1047	default:
1048		PANIC_IF(1);
1049		break;
1050	}
1051	mtx_unlock(&ep->com.lock);
1052	if (release)
1053		put_ep(&ep->com);
1054	return;
1055}
1056
1057/*
1058 * T3A does 3 things when a TERM is received:
1059 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1060 * 2) generate an async event on the QP with the TERMINATE opcode
1061 * 3) post a TERMINATE opcde cqe into the associated CQ.
1062 *
1063 * For (1), we save the message in the qp for later consumer consumption.
1064 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1065 * For (3), we toss the CQE in cxio_poll_cq().
1066 *
1067 * terminate() handles case (1)...
1068 */
1069static int
1070terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1071{
1072	struct adapter *sc = qs->adap;
1073	struct tom_data *td = sc->tom_softc;
1074	uint32_t hash = *((uint32_t *)r + 1);
1075	unsigned int tid = ntohl(hash) >> 8 & 0xfffff;
1076	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1077	struct socket *so = toep->tp_inp->inp_socket;
1078	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
1079
1080	if (state_read(&ep->com) != FPDU_MODE)
1081		goto done;
1082
1083	m_adj(m, sizeof(struct cpl_rdma_terminate));
1084
1085	CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes",
1086	    __func__, tid, ep, m->m_len);
1087
1088	m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
1089	ep->com.qp->attr.terminate_msg_len = m->m_len;
1090	ep->com.qp->attr.is_terminate_local = 0;
1091
1092done:
1093	m_freem(m);
1094	return (0);
1095}
1096
1097static int
1098ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1099{
1100	struct adapter *sc = qs->adap;
1101	struct tom_data *td = sc->tom_softc;
1102	struct cpl_rdma_ec_status *rep = mtod(m, void *);
1103	unsigned int tid = GET_TID(rep);
1104	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1105	struct socket *so = toep->tp_inp->inp_socket;
1106	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
1107
1108	if (rep->status) {
1109		struct iwch_qp_attributes attrs;
1110
1111		CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__);
1112		stop_ep_timer(ep);
1113		attrs.next_state = IWCH_QP_STATE_ERROR;
1114		iwch_modify_qp(ep->com.qp->rhp,
1115			     ep->com.qp,
1116			     IWCH_QP_ATTR_NEXT_STATE,
1117			     &attrs, 1);
1118		abort_connection(ep);
1119	}
1120
1121	m_freem(m);
1122	return (0);
1123}
1124
1125static void
1126ep_timeout(void *arg)
1127{
1128	struct iwch_ep *ep = (struct iwch_ep *)arg;
1129	struct iwch_qp_attributes attrs;
1130	int err = 0;
1131	int abort = 1;
1132
1133	mtx_lock(&ep->com.lock);
1134	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1135	switch (ep->com.state) {
1136	case MPA_REQ_SENT:
1137		__state_set(&ep->com, ABORTING);
1138		connect_reply_upcall(ep, -ETIMEDOUT);
1139		break;
1140	case MPA_REQ_WAIT:
1141		__state_set(&ep->com, ABORTING);
1142		break;
1143	case CLOSING:
1144	case MORIBUND:
1145		if (ep->com.cm_id && ep->com.qp)
1146			err = 1;
1147		__state_set(&ep->com, ABORTING);
1148		break;
1149	default:
1150		CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n",
1151			__func__, ep, ep->com.state);
1152		abort = 0;
1153	}
1154	mtx_unlock(&ep->com.lock);
1155	if (err){
1156		attrs.next_state = IWCH_QP_STATE_ERROR;
1157		iwch_modify_qp(ep->com.qp->rhp,
1158			     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1159			     &attrs, 1);
1160	}
1161	if (abort)
1162		abort_connection(ep);
1163	put_ep(&ep->com);
1164}
1165
1166int
1167iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1168{
1169	int err;
1170	struct iwch_ep *ep = to_ep(cm_id);
1171	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1172
1173	if (state_read(&ep->com) == DEAD) {
1174		put_ep(&ep->com);
1175		return (-ECONNRESET);
1176	}
1177	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1178	if (mpa_rev == 0) {
1179		abort_connection(ep);
1180	} else {
1181		err = send_mpa_reject(ep, pdata, pdata_len);
1182		err = soshutdown(ep->com.so, 3);
1183	}
1184	put_ep(&ep->com);
1185	return 0;
1186}
1187
1188int
1189iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1190{
1191	int err;
1192	struct iwch_qp_attributes attrs;
1193	enum iwch_qp_attr_mask mask;
1194	struct iwch_ep *ep = to_ep(cm_id);
1195	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1196	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1197
1198	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1199	if (state_read(&ep->com) == DEAD) {
1200		err = -ECONNRESET;
1201		goto err;
1202	}
1203
1204	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1205	PANIC_IF(!qp);
1206
1207	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1208	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1209		abort_connection(ep);
1210		err = -EINVAL;
1211		goto err;
1212	}
1213
1214	cm_id->add_ref(cm_id);
1215	ep->com.cm_id = cm_id;
1216	ep->com.qp = qp;
1217
1218	ep->com.rpl_err = 0;
1219	ep->com.rpl_done = 0;
1220	ep->ird = conn_param->ird;
1221	ep->ord = conn_param->ord;
1222	CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
1223
1224	/* bind QP to EP and move to RTS */
1225	attrs.mpa_attr = ep->mpa_attr;
1226	attrs.max_ird = ep->ird;
1227	attrs.max_ord = ep->ord;
1228	attrs.llp_stream_handle = ep;
1229	attrs.next_state = IWCH_QP_STATE_RTS;
1230
1231	/* bind QP and TID with INIT_WR */
1232	mask = IWCH_QP_ATTR_NEXT_STATE |
1233			     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1234			     IWCH_QP_ATTR_MPA_ATTR |
1235			     IWCH_QP_ATTR_MAX_IRD |
1236			     IWCH_QP_ATTR_MAX_ORD;
1237
1238	err = iwch_modify_qp(ep->com.qp->rhp,
1239			     ep->com.qp, mask, &attrs, 1);
1240
1241	if (err)
1242		goto err1;
1243
1244	err = send_mpa_reply(ep, conn_param->private_data,
1245 			     conn_param->private_data_len);
1246	if (err)
1247		goto err1;
1248	state_set(&ep->com, FPDU_MODE);
1249	established_upcall(ep);
1250	put_ep(&ep->com);
1251	return 0;
1252err1:
1253	ep->com.cm_id = NULL;
1254	ep->com.qp = NULL;
1255	cm_id->rem_ref(cm_id);
1256err:
1257	put_ep(&ep->com);
1258	return err;
1259}
1260
1261static int init_sock(struct iwch_ep_common *epc)
1262{
1263	int err;
1264	struct sockopt sopt;
1265	int on=1;
1266
1267	SOCK_LOCK(epc->so);
1268	soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc);
1269	epc->so->so_state |= SS_NBIO;
1270	SOCK_UNLOCK(epc->so);
1271	sopt.sopt_dir = SOPT_SET;
1272	sopt.sopt_level = IPPROTO_TCP;
1273	sopt.sopt_name = TCP_NODELAY;
1274	sopt.sopt_val = (caddr_t)&on;
1275	sopt.sopt_valsize = sizeof on;
1276	sopt.sopt_td = NULL;
1277	err = sosetopt(epc->so, &sopt);
1278	if (err)
1279		printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err);
1280
1281	return 0;
1282}
1283
1284static int
1285is_loopback_dst(struct iw_cm_id *cm_id)
1286{
1287	uint16_t port = cm_id->remote_addr.sin_port;
1288	int ifa_present;
1289
1290	cm_id->remote_addr.sin_port = 0;
1291	ifa_present = ifa_ifwithaddr_check(
1292	    (struct sockaddr *)&cm_id->remote_addr);
1293	cm_id->remote_addr.sin_port = port;
1294	return (ifa_present);
1295}
1296
1297int
1298iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1299{
1300	int err = 0;
1301	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1302	struct iwch_ep *ep;
1303	struct rtentry *rt;
1304	struct toedev *tdev;
1305
1306	if (is_loopback_dst(cm_id)) {
1307		err = -ENOSYS;
1308		goto out;
1309	}
1310
1311	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1312	if (!ep) {
1313		printf("%s - cannot alloc ep.\n", __FUNCTION__);
1314		err = (-ENOMEM);
1315		goto out;
1316	}
1317	callout_init(&ep->timer, TRUE);
1318	ep->plen = conn_param->private_data_len;
1319	if (ep->plen)
1320		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1321		       conn_param->private_data, ep->plen);
1322	ep->ird = conn_param->ird;
1323	ep->ord = conn_param->ord;
1324
1325	cm_id->add_ref(cm_id);
1326	ep->com.cm_id = cm_id;
1327	ep->com.qp = get_qhp(h, conn_param->qpn);
1328	ep->com.thread = curthread;
1329	PANIC_IF(!ep->com.qp);
1330	CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn,
1331	     ep->com.qp, cm_id);
1332
1333	ep->com.so = cm_id->so;
1334	err = init_sock(&ep->com);
1335	if (err)
1336		goto fail2;
1337
1338	/* find a route */
1339	rt = find_route(cm_id->local_addr.sin_addr.s_addr,
1340			cm_id->remote_addr.sin_addr.s_addr,
1341			cm_id->local_addr.sin_port,
1342			cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
1343	if (!rt) {
1344		printf("%s - cannot find route.\n", __FUNCTION__);
1345		err = EHOSTUNREACH;
1346		goto fail2;
1347	}
1348
1349	if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
1350		printf("%s - interface not TOE capable.\n", __FUNCTION__);
1351		RTFREE(rt);
1352		goto fail2;
1353	}
1354	tdev = TOEDEV(rt->rt_ifp);
1355	if (tdev == NULL) {
1356		printf("%s - No toedev for interface.\n", __FUNCTION__);
1357		RTFREE(rt);
1358		goto fail2;
1359	}
1360	RTFREE(rt);
1361
1362	state_set(&ep->com, CONNECTING);
1363	ep->com.local_addr = cm_id->local_addr;
1364	ep->com.remote_addr = cm_id->remote_addr;
1365	err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
1366		ep->com.thread);
1367	if (!err)
1368		goto out;
1369fail2:
1370	put_ep(&ep->com);
1371out:
1372	return err;
1373}
1374
1375int
1376iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
1377{
1378	int err = 0;
1379	struct iwch_listen_ep *ep;
1380
1381	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1382	if (!ep) {
1383		printf("%s - cannot alloc ep.\n", __FUNCTION__);
1384		err = ENOMEM;
1385		goto out;
1386	}
1387	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1388	cm_id->add_ref(cm_id);
1389	ep->com.cm_id = cm_id;
1390	ep->backlog = backlog;
1391	ep->com.local_addr = cm_id->local_addr;
1392	ep->com.thread = curthread;
1393	state_set(&ep->com, LISTEN);
1394
1395	ep->com.so = cm_id->so;
1396	cm_id->provider_data = ep;
1397out:
1398	return err;
1399}
1400
1401void
1402iwch_destroy_listen_ep(struct iw_cm_id *cm_id)
1403{
1404	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
1405
1406	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1407
1408	state_set(&ep->com, DEAD);
1409	cm_id->rem_ref(cm_id);
1410	put_ep(&ep->com);
1411	return;
1412}
1413
1414int
1415iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags)
1416{
1417	int close = 0;
1418
1419	mtx_lock(&ep->com.lock);
1420
1421	PANIC_IF(!ep);
1422	PANIC_IF(!ep->com.so);
1423
1424	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
1425	     ep->com.so, states[ep->com.state], abrupt);
1426
1427	switch (ep->com.state) {
1428	case MPA_REQ_WAIT:
1429	case MPA_REQ_SENT:
1430	case MPA_REQ_RCVD:
1431	case MPA_REP_SENT:
1432	case FPDU_MODE:
1433		close = 1;
1434		if (abrupt)
1435			ep->com.state = ABORTING;
1436		else {
1437			ep->com.state = CLOSING;
1438			start_ep_timer(ep);
1439		}
1440		break;
1441	case CLOSING:
1442		close = 1;
1443		if (abrupt) {
1444			stop_ep_timer(ep);
1445			ep->com.state = ABORTING;
1446		} else
1447			ep->com.state = MORIBUND;
1448		break;
1449	case MORIBUND:
1450	case ABORTING:
1451	case DEAD:
1452		CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n",
1453			__func__, ep, ep->com.state);
1454		break;
1455	default:
1456		panic("unknown state: %d\n", ep->com.state);
1457		break;
1458	}
1459
1460	mtx_unlock(&ep->com.lock);
1461	if (close) {
1462		if (abrupt)
1463			abort_connection(ep);
1464		else {
1465			if (!ep->parent_ep)
1466				__state_set(&ep->com, MORIBUND);
1467			shutdown_socket(&ep->com);
1468		}
1469	}
1470	return 0;
1471}
1472
1473static void
1474process_data(struct iwch_ep *ep)
1475{
1476	struct sockaddr_in *local, *remote;
1477
1478	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1479
1480	switch (state_read(&ep->com)) {
1481	case MPA_REQ_SENT:
1482		process_mpa_reply(ep);
1483		break;
1484	case MPA_REQ_WAIT:
1485
1486		/*
1487		 * XXX
1488		 * Set local and remote addrs here because when we
1489		 * dequeue the newly accepted socket, they aren't set
1490		 * yet in the pcb!
1491		 */
1492		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
1493		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
1494		CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__,
1495			inet_ntoa(local->sin_addr),
1496			inet_ntoa(remote->sin_addr));
1497		ep->com.local_addr = *local;
1498		ep->com.remote_addr = *remote;
1499		free(local, M_SONAME);
1500		free(remote, M_SONAME);
1501		process_mpa_request(ep);
1502		break;
1503	default:
1504		if (ep->com.so->so_rcv.sb_cc)
1505			printf("%s Unexpected streaming data."
1506			       " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n",
1507			       __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state,
1508			       ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb);
1509		break;
1510	}
1511	return;
1512}
1513
1514static void
1515process_connected(struct iwch_ep *ep)
1516{
1517	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1518	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
1519		send_mpa_req(ep);
1520	} else {
1521		connect_reply_upcall(ep, -ep->com.so->so_error);
1522		close_socket(&ep->com, 0);
1523		state_set(&ep->com, DEAD);
1524		put_ep(&ep->com);
1525	}
1526}
1527
1528void
1529process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
1530{
1531	struct iwch_ep *child_ep;
1532	struct sockaddr_in *local;
1533	struct sockaddr_in *remote;
1534	struct iwch_ep *parent_ep = parent_cm_id->provider_data;
1535
1536	CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
1537	if (!child_so) {
1538		log(LOG_ERR, "%s - invalid child socket!\n", __func__);
1539		return;
1540	}
1541	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
1542	if (!child_ep) {
1543		log(LOG_ERR, "%s - failed to allocate ep entry!\n",
1544		       __FUNCTION__);
1545		return;
1546	}
1547	SOCKBUF_LOCK(&child_so->so_rcv);
1548	soupcall_set(child_so, SO_RCV, iwch_so_upcall, child_ep);
1549	SOCKBUF_UNLOCK(&child_so->so_rcv);
1550
1551	in_getsockaddr(child_so, (struct sockaddr **)&local);
1552	in_getpeeraddr(child_so, (struct sockaddr **)&remote);
1553
1554	CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
1555		inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
1556	child_ep->com.tdev = parent_ep->com.tdev;
1557	child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family;
1558	child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port;
1559	child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr;
1560	child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len;
1561	child_ep->com.remote_addr.sin_family = remote->sin_family;
1562	child_ep->com.remote_addr.sin_port = remote->sin_port;
1563	child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr;
1564	child_ep->com.remote_addr.sin_len = remote->sin_len;
1565	child_ep->com.so = child_so;
1566	child_ep->com.cm_id = NULL;
1567	child_ep->com.thread = parent_ep->com.thread;
1568	child_ep->parent_ep = parent_ep;
1569
1570	free(local, M_SONAME);
1571	free(remote, M_SONAME);
1572	get_ep(&parent_ep->com);
1573	callout_init(&child_ep->timer, TRUE);
1574	state_set(&child_ep->com, MPA_REQ_WAIT);
1575	start_ep_timer(child_ep);
1576
1577	/* maybe the request has already been queued up on the socket... */
1578	process_mpa_request(child_ep);
1579}
1580
1581static int
1582iwch_so_upcall(struct socket *so, void *arg, int waitflag)
1583{
1584	struct iwch_ep *ep = arg;
1585
1586	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1587	mtx_lock(&req_lock);
1588	if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
1589		get_ep(&ep->com);
1590		TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
1591		taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task);
1592	}
1593	mtx_unlock(&req_lock);
1594	return (SU_OK);
1595}
1596
1597static void
1598process_socket_event(struct iwch_ep *ep)
1599{
1600	int state = state_read(&ep->com);
1601	struct socket *so = ep->com.so;
1602
1603	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1604	if (state == CONNECTING) {
1605		process_connected(ep);
1606		return;
1607	}
1608
1609	if (state == LISTEN) {
1610		/* socket listening events are handled at IWCM */
1611		CTR3(KTR_IW_CXGB, "%s Invalid ep state:%u, ep:%p", __func__,
1612			ep->com.state, ep);
1613		BUG();
1614		return;
1615	}
1616
1617	/* connection error */
1618	if (so->so_error) {
1619		process_conn_error(ep);
1620		return;
1621	}
1622
1623	/* peer close */
1624	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
1625		process_peer_close(ep);
1626		return;
1627	}
1628
1629	/* close complete */
1630	if (so->so_state & (SS_ISDISCONNECTED)) {
1631		process_close_complete(ep);
1632		return;
1633	}
1634
1635	/* rx data */
1636	process_data(ep);
1637	return;
1638}
1639
1640static void
1641process_req(void *ctx, int pending)
1642{
1643	struct iwch_ep_common *epc;
1644
1645	CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__);
1646	mtx_lock(&req_lock);
1647	while (!TAILQ_EMPTY(&req_list)) {
1648		epc = TAILQ_FIRST(&req_list);
1649		TAILQ_REMOVE(&req_list, epc, entry);
1650		epc->entry.tqe_prev = NULL;
1651		mtx_unlock(&req_lock);
1652		if (epc->so)
1653			process_socket_event((struct iwch_ep *)epc);
1654		put_ep(epc);
1655		mtx_lock(&req_lock);
1656	}
1657	mtx_unlock(&req_lock);
1658}
1659
1660int
1661iwch_cm_init(void)
1662{
1663	TAILQ_INIT(&req_list);
1664	mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF);
1665	iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT,
1666		taskqueue_thread_enqueue, &iw_cxgb_taskq);
1667        if (iw_cxgb_taskq == NULL) {
1668                printf("failed to allocate iw_cxgb taskqueue\n");
1669                return (ENOMEM);
1670        }
1671        taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
1672        TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
1673	return (0);
1674}
1675
1676void
1677iwch_cm_term(void)
1678{
1679
1680	taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
1681	taskqueue_free(iw_cxgb_taskq);
1682}
1683
1684void
1685iwch_cm_init_cpl(struct adapter *sc)
1686{
1687
1688	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
1689	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status);
1690}
1691
1692void
1693iwch_cm_term_cpl(struct adapter *sc)
1694{
1695
1696	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
1697	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL);
1698}
1699#endif
1700