cm.c revision 331769
1/*
2 * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	  copyright notice, this list of conditions and the following
16 *	  disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	  copyright notice, this list of conditions and the following
20 *	  disclaimer in the documentation and/or other materials
21 *	  provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/iw_cxgbe/cm.c 331769 2018-03-30 18:06:29Z hselasky $");
34
35#include "opt_inet.h"
36
37#ifdef TCP_OFFLOAD
38#include <sys/types.h>
39#include <sys/malloc.h>
40#include <sys/socket.h>
41#include <sys/socketvar.h>
42#include <sys/sockio.h>
43#include <sys/taskqueue.h>
44#include <netinet/in.h>
45#include <net/route.h>
46
47#include <netinet/in_systm.h>
48#include <netinet/in_pcb.h>
49#include <netinet6/in6_pcb.h>
50#include <netinet/ip.h>
51#include <netinet/in_fib.h>
52#include <netinet6/in6_fib.h>
53#include <netinet6/scope6_var.h>
54#include <netinet/ip_var.h>
55#include <netinet/tcp_var.h>
56#include <netinet/tcp.h>
57#include <netinet/tcpip.h>
58
59#include <netinet/toecore.h>
60
61struct sge_iq;
62struct rss_header;
63struct cpl_set_tcb_rpl;
64#include <linux/types.h>
65#include "offload.h"
66#include "tom/t4_tom.h"
67
68#define TOEPCB(so)  ((struct toepcb *)(so_sototcpcb((so))->t_toe))
69
70#include "iw_cxgbe.h"
71#include <linux/module.h>
72#include <linux/workqueue.h>
73#include <linux/notifier.h>
74#include <linux/inetdevice.h>
75#include <linux/if_vlan.h>
76#include <net/netevent.h>
77
78static spinlock_t req_lock;
79static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
80static struct work_struct c4iw_task;
81static struct workqueue_struct *c4iw_taskq;
82static LIST_HEAD(err_cqe_list);
83static spinlock_t err_cqe_lock;
84static LIST_HEAD(listen_port_list);
85static DEFINE_MUTEX(listen_port_mutex);
86
87static void process_req(struct work_struct *ctx);
88static void start_ep_timer(struct c4iw_ep *ep);
89static int stop_ep_timer(struct c4iw_ep *ep);
90static int set_tcpinfo(struct c4iw_ep *ep);
91static void process_timeout(struct c4iw_ep *ep);
92static void process_err_cqes(void);
93static void *alloc_ep(int size, gfp_t flags);
94static void close_socket(struct socket *so);
95static int send_mpa_req(struct c4iw_ep *ep);
96static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
97static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
98static void close_complete_upcall(struct c4iw_ep *ep, int status);
99static int send_abort(struct c4iw_ep *ep);
100static void peer_close_upcall(struct c4iw_ep *ep);
101static void peer_abort_upcall(struct c4iw_ep *ep);
102static void connect_reply_upcall(struct c4iw_ep *ep, int status);
103static int connect_request_upcall(struct c4iw_ep *ep);
104static void established_upcall(struct c4iw_ep *ep);
105static int process_mpa_reply(struct c4iw_ep *ep);
106static int process_mpa_request(struct c4iw_ep *ep);
107static void process_peer_close(struct c4iw_ep *ep);
108static void process_conn_error(struct c4iw_ep *ep);
109static void process_close_complete(struct c4iw_ep *ep);
110static void ep_timeout(unsigned long arg);
111static void setiwsockopt(struct socket *so);
112static void init_iwarp_socket(struct socket *so, void *arg);
113static void uninit_iwarp_socket(struct socket *so);
114static void process_data(struct c4iw_ep *ep);
115static void process_connected(struct c4iw_ep *ep);
116static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
117static void process_socket_event(struct c4iw_ep *ep);
118static void release_ep_resources(struct c4iw_ep *ep);
119static int process_terminate(struct c4iw_ep *ep);
120static int terminate(struct sge_iq *iq, const struct rss_header *rss,
121    struct mbuf *m);
122static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events);
123static struct listen_port_info *
124add_ep_to_listenlist(struct c4iw_listen_ep *lep);
125static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep);
126static struct c4iw_listen_ep *
127find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so);
128static int get_ifnet_from_raddr(struct sockaddr_storage *raddr,
129		struct ifnet **ifp);
130static void process_newconn(struct c4iw_listen_ep *master_lep,
131		struct socket *new_so);
132#define START_EP_TIMER(ep) \
133    do { \
134	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
135		__func__, __LINE__, (ep)); \
136	    start_ep_timer(ep); \
137    } while (0)
138
139#define STOP_EP_TIMER(ep) \
140    ({ \
141	    CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
142		__func__, __LINE__, (ep)); \
143	    stop_ep_timer(ep); \
144    })
145
146#define GET_LOCAL_ADDR(pladdr, so) \
147	do { \
148		struct sockaddr_storage *__a = NULL; \
149		struct  inpcb *__inp = sotoinpcb(so); \
150		KASSERT(__inp != NULL, \
151		   ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
152		if (__inp->inp_vflag & INP_IPV4) \
153			in_getsockaddr(so, (struct sockaddr **)&__a); \
154		else \
155			in6_getsockaddr(so, (struct sockaddr **)&__a); \
156		*(pladdr) = *__a; \
157		free(__a, M_SONAME); \
158	} while (0)
159
160#define GET_REMOTE_ADDR(praddr, so) \
161	do { \
162		struct sockaddr_storage *__a = NULL; \
163		struct  inpcb *__inp = sotoinpcb(so); \
164		KASSERT(__inp != NULL, \
165		   ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
166		if (__inp->inp_vflag & INP_IPV4) \
167			in_getpeeraddr(so, (struct sockaddr **)&__a); \
168		else \
169			in6_getpeeraddr(so, (struct sockaddr **)&__a); \
170		*(praddr) = *__a; \
171		free(__a, M_SONAME); \
172	} while (0)
173
174#ifdef KTR
175static char *states[] = {
176	"idle",
177	"listen",
178	"connecting",
179	"mpa_wait_req",
180	"mpa_req_sent",
181	"mpa_req_rcvd",
182	"mpa_rep_sent",
183	"fpdu_mode",
184	"aborting",
185	"closing",
186	"moribund",
187	"dead",
188	NULL,
189};
190#endif
191
192static void deref_cm_id(struct c4iw_ep_common *epc)
193{
194      epc->cm_id->rem_ref(epc->cm_id);
195      epc->cm_id = NULL;
196      set_bit(CM_ID_DEREFED, &epc->history);
197}
198
199static void ref_cm_id(struct c4iw_ep_common *epc)
200{
201      set_bit(CM_ID_REFED, &epc->history);
202      epc->cm_id->add_ref(epc->cm_id);
203}
204
205static void deref_qp(struct c4iw_ep *ep)
206{
207	c4iw_qp_rem_ref(&ep->com.qp->ibqp);
208	clear_bit(QP_REFERENCED, &ep->com.flags);
209	set_bit(QP_DEREFED, &ep->com.history);
210}
211
212static void ref_qp(struct c4iw_ep *ep)
213{
214	set_bit(QP_REFERENCED, &ep->com.flags);
215	set_bit(QP_REFED, &ep->com.history);
216	c4iw_qp_add_ref(&ep->com.qp->ibqp);
217}
218/* allocated per TCP port while listening */
219struct listen_port_info {
220	uint16_t port_num; /* TCP port address */
221	struct list_head list; /* belongs to listen_port_list */
222	struct list_head lep_list; /* per port lep list */
223	uint32_t refcnt; /* number of lep's listening */
224};
225
226/*
227 * Following two lists are used to manage INADDR_ANY listeners:
228 * 1)listen_port_list
229 * 2)lep_list
230 *
231 * Below is the INADDR_ANY listener lists overview on a system with a two port
232 * adapter:
233 *   |------------------|
234 *   |listen_port_list  |
235 *   |------------------|
236 *            |
237 *            |              |-----------|       |-----------|
238 *            |              | port_num:X|       | port_num:X|
239 *            |--------------|-list------|-------|-list------|-------....
240 *                           | lep_list----|     | lep_list----|
241 *                           | refcnt    | |     | refcnt    | |
242 *                           |           | |     |           | |
243 *                           |           | |     |           | |
244 *                           |-----------| |     |-----------| |
245 *                                         |                   |
246 *                                         |                   |
247 *                                         |                   |
248 *                                         |                   |         lep1                  lep2
249 *                                         |                   |    |----------------|    |----------------|
250 *                                         |                   |----| listen_ep_list |----| listen_ep_list |
251 *                                         |                        |----------------|    |----------------|
252 *                                         |
253 *                                         |
254 *                                         |        lep1                  lep2
255 *                                         |   |----------------|    |----------------|
256 *                                         |---| listen_ep_list |----| listen_ep_list |
257 *                                             |----------------|    |----------------|
258 *
259 * Because of two port adapter, the number of lep's are two(lep1 & lep2) for
260 * each TCP port number.
261 *
262 * Here 'lep1' is always marked as Master lep, because solisten() is always
263 * called through first lep.
264 *
265 */
266static struct listen_port_info *
267add_ep_to_listenlist(struct c4iw_listen_ep *lep)
268{
269	uint16_t port;
270	struct listen_port_info *port_info = NULL;
271	struct sockaddr_storage *laddr = &lep->com.local_addr;
272
273	port = (laddr->ss_family == AF_INET) ?
274		((struct sockaddr_in *)laddr)->sin_port :
275		((struct sockaddr_in6 *)laddr)->sin6_port;
276
277	mutex_lock(&listen_port_mutex);
278
279	list_for_each_entry(port_info, &listen_port_list, list)
280		if (port_info->port_num == port)
281			goto found_port;
282
283	port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK);
284	port_info->port_num = port;
285	port_info->refcnt    = 0;
286
287	list_add_tail(&port_info->list, &listen_port_list);
288	INIT_LIST_HEAD(&port_info->lep_list);
289
290found_port:
291	port_info->refcnt++;
292	list_add_tail(&lep->listen_ep_list, &port_info->lep_list);
293	mutex_unlock(&listen_port_mutex);
294	return port_info;
295}
296
297static int
298rem_ep_from_listenlist(struct c4iw_listen_ep *lep)
299{
300	uint16_t port;
301	struct listen_port_info *port_info = NULL;
302	struct sockaddr_storage *laddr = &lep->com.local_addr;
303	int refcnt = 0;
304
305	port = (laddr->ss_family == AF_INET) ?
306		((struct sockaddr_in *)laddr)->sin_port :
307		((struct sockaddr_in6 *)laddr)->sin6_port;
308
309	mutex_lock(&listen_port_mutex);
310
311	/* get the port_info structure based on the lep's port address */
312	list_for_each_entry(port_info, &listen_port_list, list) {
313		if (port_info->port_num == port) {
314			port_info->refcnt--;
315			refcnt = port_info->refcnt;
316			/* remove the current lep from the listen list */
317			list_del(&lep->listen_ep_list);
318			if (port_info->refcnt == 0) {
319				/* Remove this entry from the list as there
320				 * are no more listeners for this port_num.
321				 */
322				list_del(&port_info->list);
323				kfree(port_info);
324			}
325			break;
326		}
327	}
328	mutex_unlock(&listen_port_mutex);
329	return refcnt;
330}
331
332/*
333 * Find the lep that belongs to the ifnet on which the SYN frame was received.
334 */
335struct c4iw_listen_ep *
336find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so)
337{
338	struct adapter *adap = NULL;
339	struct c4iw_listen_ep *lep = NULL;
340	struct sockaddr_storage remote = { 0 };
341	struct ifnet *new_conn_ifp = NULL;
342	struct listen_port_info *port_info = NULL;
343	int err = 0, i = 0,
344	    found_portinfo = 0, found_lep = 0;
345	uint16_t port;
346
347	/* STEP 1: get 'ifnet' based on socket's remote address */
348	GET_REMOTE_ADDR(&remote, so);
349
350	err = get_ifnet_from_raddr(&remote, &new_conn_ifp);
351	if (err) {
352		CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, "
353				"master_lep %p err %d",
354				__func__, so, master_lep, err);
355		return (NULL);
356	}
357
358	/* STEP 2: Find 'port_info' with listener local port address. */
359	port = (master_lep->com.local_addr.ss_family == AF_INET) ?
360		((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port :
361		((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port;
362
363
364	mutex_lock(&listen_port_mutex);
365	list_for_each_entry(port_info, &listen_port_list, list)
366		if (port_info->port_num == port) {
367			found_portinfo =1;
368			break;
369		}
370	if (!found_portinfo)
371		goto out;
372
373	/* STEP 3: Traverse through list of lep's that are bound to the current
374	 * TCP port address and find the lep that belongs to the ifnet on which
375	 * the SYN frame was received.
376	 */
377	list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) {
378		adap = lep->com.dev->rdev.adap;
379		for_each_port(adap, i) {
380			if (new_conn_ifp == adap->port[i]->vi[0].ifp) {
381				found_lep =1;
382				goto out;
383			}
384		}
385	}
386out:
387	mutex_unlock(&listen_port_mutex);
388	return found_lep ? lep : (NULL);
389}
390
391static void process_timeout(struct c4iw_ep *ep)
392{
393	struct c4iw_qp_attributes attrs = {0};
394	int abort = 1;
395
396	CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
397			ep, ep->hwtid, ep->com.state);
398	set_bit(TIMEDOUT, &ep->com.history);
399	switch (ep->com.state) {
400	case MPA_REQ_SENT:
401		connect_reply_upcall(ep, -ETIMEDOUT);
402		break;
403	case MPA_REQ_WAIT:
404	case MPA_REQ_RCVD:
405	case MPA_REP_SENT:
406	case FPDU_MODE:
407		break;
408	case CLOSING:
409	case MORIBUND:
410		if (ep->com.cm_id && ep->com.qp) {
411			attrs.next_state = C4IW_QP_STATE_ERROR;
412			c4iw_modify_qp(ep->com.dev, ep->com.qp,
413					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
414		}
415		close_complete_upcall(ep, -ETIMEDOUT);
416		break;
417	case ABORTING:
418	case DEAD:
419		/*
420		 * These states are expected if the ep timed out at the same
421		 * time as another thread was calling stop_ep_timer().
422		 * So we silently do nothing for these states.
423		 */
424		abort = 0;
425		break;
426	default:
427		CTR4(KTR_IW_CXGBE, "%s unexpected state ep %p tid %u state %u\n"
428				, __func__, ep, ep->hwtid, ep->com.state);
429		abort = 0;
430	}
431	if (abort)
432		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
433	c4iw_put_ep(&ep->com);
434	return;
435}
436
437struct cqe_list_entry {
438	struct list_head entry;
439	struct c4iw_dev *rhp;
440	struct t4_cqe err_cqe;
441};
442
443static void
444process_err_cqes(void)
445{
446	unsigned long flag;
447	struct cqe_list_entry *cle;
448
449	spin_lock_irqsave(&err_cqe_lock, flag);
450	while (!list_empty(&err_cqe_list)) {
451		struct list_head *tmp;
452		tmp = err_cqe_list.next;
453		list_del(tmp);
454		tmp->next = tmp->prev = NULL;
455		spin_unlock_irqrestore(&err_cqe_lock, flag);
456		cle = list_entry(tmp, struct cqe_list_entry, entry);
457		c4iw_ev_dispatch(cle->rhp, &cle->err_cqe);
458		free(cle, M_CXGBE);
459		spin_lock_irqsave(&err_cqe_lock, flag);
460	}
461	spin_unlock_irqrestore(&err_cqe_lock, flag);
462
463	return;
464}
465
466static void
467process_req(struct work_struct *ctx)
468{
469	struct c4iw_ep_common *epc;
470	unsigned long flag;
471	int ep_events;
472
473	process_err_cqes();
474	spin_lock_irqsave(&req_lock, flag);
475	while (!TAILQ_EMPTY(&req_list)) {
476		epc = TAILQ_FIRST(&req_list);
477		TAILQ_REMOVE(&req_list, epc, entry);
478		epc->entry.tqe_prev = NULL;
479		ep_events = epc->ep_events;
480		epc->ep_events = 0;
481		spin_unlock_irqrestore(&req_lock, flag);
482		mutex_lock(&epc->mutex);
483		CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x",
484		    __func__, epc->so, epc, states[epc->state], ep_events);
485		if (ep_events & C4IW_EVENT_TERM)
486			process_terminate((struct c4iw_ep *)epc);
487		if (ep_events & C4IW_EVENT_TIMEOUT)
488			process_timeout((struct c4iw_ep *)epc);
489		if (ep_events & C4IW_EVENT_SOCKET)
490			process_socket_event((struct c4iw_ep *)epc);
491		mutex_unlock(&epc->mutex);
492		c4iw_put_ep(epc);
493		process_err_cqes();
494		spin_lock_irqsave(&req_lock, flag);
495	}
496	spin_unlock_irqrestore(&req_lock, flag);
497}
498
499/*
500 * XXX: doesn't belong here in the iWARP driver.
501 * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
502 *      set.  Is this a valid assumption for active open?
503 */
504static int
505set_tcpinfo(struct c4iw_ep *ep)
506{
507	struct socket *so = ep->com.so;
508	struct inpcb *inp = sotoinpcb(so);
509	struct tcpcb *tp;
510	struct toepcb *toep;
511	int rc = 0;
512
513	INP_WLOCK(inp);
514	tp = intotcpcb(inp);
515	if ((tp->t_flags & TF_TOE) == 0) {
516		rc = EINVAL;
517		log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
518		    __func__, so, ep);
519		goto done;
520	}
521	toep = TOEPCB(so);
522
523	ep->hwtid = toep->tid;
524	ep->snd_seq = tp->snd_nxt;
525	ep->rcv_seq = tp->rcv_nxt;
526	ep->emss = max(tp->t_maxseg, 128);
527done:
528	INP_WUNLOCK(inp);
529	return (rc);
530
531}
532static int
533get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp)
534{
535	int err = 0;
536
537	if (raddr->ss_family == AF_INET) {
538		struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr;
539		struct nhop4_extended nh4 = {0};
540
541		err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr,
542				NHR_REF, 0, &nh4);
543		*ifp = nh4.nh_ifp;
544		if (err)
545			fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
546	} else {
547		struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr;
548		struct nhop6_extended nh6 = {0};
549		struct in6_addr addr6;
550		uint32_t scopeid;
551
552		memset(&addr6, 0, sizeof(addr6));
553		in6_splitscope((struct in6_addr *)&raddr6->sin6_addr,
554					&addr6, &scopeid);
555		err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid,
556				NHR_REF, 0, &nh6);
557		*ifp = nh6.nh_ifp;
558		if (err)
559			fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6);
560	}
561
562	CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err);
563	return err;
564}
565
566static void
567close_socket(struct socket *so)
568{
569	uninit_iwarp_socket(so);
570	soclose(so);
571}
572
573static void
574process_peer_close(struct c4iw_ep *ep)
575{
576	struct c4iw_qp_attributes attrs = {0};
577	int disconnect = 1;
578	int release = 0;
579
580	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
581	    ep->com.so, states[ep->com.state]);
582
583	switch (ep->com.state) {
584
585		case MPA_REQ_WAIT:
586			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD",
587			    __func__, ep);
588			/* Fallthrough */
589		case MPA_REQ_SENT:
590			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD",
591			    __func__, ep);
592			ep->com.state = DEAD;
593			connect_reply_upcall(ep, -ECONNABORTED);
594
595			disconnect = 0;
596			STOP_EP_TIMER(ep);
597			close_socket(ep->com.so);
598			deref_cm_id(&ep->com);
599			release = 1;
600			break;
601
602		case MPA_REQ_RCVD:
603
604			/*
605			 * We're gonna mark this puppy DEAD, but keep
606			 * the reference on it until the ULP accepts or
607			 * rejects the CR.
608			 */
609			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
610			    __func__, ep);
611			ep->com.state = CLOSING;
612			break;
613
614		case MPA_REP_SENT:
615			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
616			    __func__, ep);
617			ep->com.state = CLOSING;
618			break;
619
620		case FPDU_MODE:
621			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
622			    __func__, ep);
623			START_EP_TIMER(ep);
624			ep->com.state = CLOSING;
625			attrs.next_state = C4IW_QP_STATE_CLOSING;
626			c4iw_modify_qp(ep->com.dev, ep->com.qp,
627					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
628			peer_close_upcall(ep);
629			break;
630
631		case ABORTING:
632			CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
633			    __func__, ep);
634			disconnect = 0;
635			break;
636
637		case CLOSING:
638			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
639			    __func__, ep);
640			ep->com.state = MORIBUND;
641			disconnect = 0;
642			break;
643
644		case MORIBUND:
645			CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
646			    ep);
647			STOP_EP_TIMER(ep);
648			if (ep->com.cm_id && ep->com.qp) {
649				attrs.next_state = C4IW_QP_STATE_IDLE;
650				c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
651						C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
652			}
653			close_socket(ep->com.so);
654			close_complete_upcall(ep, 0);
655			ep->com.state = DEAD;
656			release = 1;
657			disconnect = 0;
658			break;
659
660		case DEAD:
661			CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
662			    __func__, ep);
663			disconnect = 0;
664			break;
665
666		default:
667			panic("%s: ep %p state %d", __func__, ep,
668			    ep->com.state);
669			break;
670	}
671
672
673	if (disconnect) {
674
675		CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
676		c4iw_ep_disconnect(ep, 0, M_NOWAIT);
677	}
678	if (release) {
679
680		CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
681		c4iw_put_ep(&ep->com);
682	}
683	CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
684	return;
685}
686
687static void
688process_conn_error(struct c4iw_ep *ep)
689{
690	struct c4iw_qp_attributes attrs = {0};
691	int ret;
692	int state;
693
694	state = ep->com.state;
695	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
696	    __func__, ep, ep->com.so, ep->com.so->so_error,
697	    states[ep->com.state]);
698
699	switch (state) {
700
701		case MPA_REQ_WAIT:
702			STOP_EP_TIMER(ep);
703			c4iw_put_ep(&ep->parent_ep->com);
704			break;
705
706		case MPA_REQ_SENT:
707			STOP_EP_TIMER(ep);
708			connect_reply_upcall(ep, -ECONNRESET);
709			break;
710
711		case MPA_REP_SENT:
712			ep->com.rpl_err = ECONNRESET;
713			CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
714			break;
715
716		case MPA_REQ_RCVD:
717			break;
718
719		case MORIBUND:
720		case CLOSING:
721			STOP_EP_TIMER(ep);
722			/*FALLTHROUGH*/
723		case FPDU_MODE:
724
725			if (ep->com.cm_id && ep->com.qp) {
726
727				attrs.next_state = C4IW_QP_STATE_ERROR;
728				ret = c4iw_modify_qp(ep->com.qp->rhp,
729					ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
730					&attrs, 1);
731				if (ret)
732					log(LOG_ERR,
733							"%s - qp <- error failed!\n",
734							__func__);
735			}
736			peer_abort_upcall(ep);
737			break;
738
739		case ABORTING:
740			break;
741
742		case DEAD:
743			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
744			    __func__, ep->com.so->so_error);
745			return;
746
747		default:
748			panic("%s: ep %p state %d", __func__, ep, state);
749			break;
750	}
751
752	if (state != ABORTING) {
753		close_socket(ep->com.so);
754		ep->com.state = DEAD;
755		c4iw_put_ep(&ep->com);
756	}
757	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
758	return;
759}
760
761static void
762process_close_complete(struct c4iw_ep *ep)
763{
764	struct c4iw_qp_attributes attrs = {0};
765	int release = 0;
766
767	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
768	    ep->com.so, states[ep->com.state]);
769
770	/* The cm_id may be null if we failed to connect */
771	set_bit(CLOSE_CON_RPL, &ep->com.history);
772
773	switch (ep->com.state) {
774
775		case CLOSING:
776			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
777			    __func__, ep);
778			ep->com.state = MORIBUND;
779			break;
780
781		case MORIBUND:
782			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
783			    ep);
784			STOP_EP_TIMER(ep);
785
786			if ((ep->com.cm_id) && (ep->com.qp)) {
787
788				CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
789				    __func__, ep);
790				attrs.next_state = C4IW_QP_STATE_IDLE;
791				c4iw_modify_qp(ep->com.dev,
792						ep->com.qp,
793						C4IW_QP_ATTR_NEXT_STATE,
794						&attrs, 1);
795			}
796
797			close_socket(ep->com.so);
798			close_complete_upcall(ep, 0);
799			ep->com.state = DEAD;
800			release = 1;
801			break;
802
803		case ABORTING:
804			CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
805			break;
806
807		case DEAD:
808			CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
809			break;
810		default:
811			CTR2(KTR_IW_CXGBE, "%s:pcc7 %p unknown ep state",
812					__func__, ep);
813			panic("%s:pcc6 %p unknown ep state", __func__, ep);
814			break;
815	}
816
817	if (release) {
818
819		CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
820		release_ep_resources(ep);
821	}
822	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
823	return;
824}
825
826static void
827setiwsockopt(struct socket *so)
828{
829	int rc;
830	struct sockopt sopt;
831	int on = 1;
832
833	sopt.sopt_dir = SOPT_SET;
834	sopt.sopt_level = IPPROTO_TCP;
835	sopt.sopt_name = TCP_NODELAY;
836	sopt.sopt_val = (caddr_t)&on;
837	sopt.sopt_valsize = sizeof on;
838	sopt.sopt_td = NULL;
839	rc = sosetopt(so, &sopt);
840	if (rc) {
841		log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
842		    __func__, so, rc);
843	}
844}
845
846static void
847init_iwarp_socket(struct socket *so, void *arg)
848{
849
850	SOCKBUF_LOCK(&so->so_rcv);
851	soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
852	so->so_state |= SS_NBIO;
853	SOCKBUF_UNLOCK(&so->so_rcv);
854}
855
856static void
857uninit_iwarp_socket(struct socket *so)
858{
859
860	SOCKBUF_LOCK(&so->so_rcv);
861	soupcall_clear(so, SO_RCV);
862	SOCKBUF_UNLOCK(&so->so_rcv);
863}
864
865static void
866process_data(struct c4iw_ep *ep)
867{
868	int disconnect = 0;
869
870	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
871	    ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
872
873	switch (ep->com.state) {
874	case MPA_REQ_SENT:
875		disconnect = process_mpa_reply(ep);
876		break;
877	case MPA_REQ_WAIT:
878		disconnect = process_mpa_request(ep);
879		if (disconnect)
880			/* Refered in process_newconn() */
881			c4iw_put_ep(&ep->parent_ep->com);
882		break;
883	default:
884		if (sbused(&ep->com.so->so_rcv))
885			log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
886			    "state %d, so %p, so_state 0x%x, sbused %u\n",
887			    __func__, ep, ep->com.state, ep->com.so,
888			    ep->com.so->so_state, sbused(&ep->com.so->so_rcv));
889		break;
890	}
891	if (disconnect)
892		c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
893
894}
895
896static void
897process_connected(struct c4iw_ep *ep)
898{
899	struct socket *so = ep->com.so;
900
901	if ((so->so_state & SS_ISCONNECTED) && !so->so_error) {
902		if (send_mpa_req(ep))
903			goto err;
904	} else {
905		connect_reply_upcall(ep, -so->so_error);
906		goto err;
907	}
908	return;
909err:
910	close_socket(so);
911	ep->com.state = DEAD;
912	c4iw_put_ep(&ep->com);
913	return;
914}
915
916static inline int c4iw_zero_addr(struct sockaddr *addr)
917{
918	struct in6_addr *ip6;
919
920	if (addr->sa_family == AF_INET)
921		return IN_ZERONET(
922			ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr));
923	else {
924		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
925		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
926				ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
927	}
928}
929
930static inline int c4iw_loopback_addr(struct sockaddr *addr)
931{
932	if (addr->sa_family == AF_INET)
933		return IN_LOOPBACK(
934			ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr));
935	else
936		return IN6_IS_ADDR_LOOPBACK(
937				&((struct sockaddr_in6 *) addr)->sin6_addr);
938}
939
940static inline int c4iw_any_addr(struct sockaddr *addr)
941{
942	return c4iw_zero_addr(addr) || c4iw_loopback_addr(addr);
943}
944
945static void
946process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so)
947{
948	struct c4iw_listen_ep *real_lep = NULL;
949	struct c4iw_ep *new_ep = NULL;
950	struct sockaddr_in *remote = NULL;
951	int ret = 0;
952
953	MPASS(new_so != NULL);
954
955	if (c4iw_any_addr((struct sockaddr *)&master_lep->com.local_addr)) {
956		/* Here we need to find the 'real_lep' that belongs to the
957		 * incomming socket's network interface, such that the newly
958		 * created 'ep' can be attached to the real 'lep'.
959		 */
960		real_lep = find_real_listen_ep(master_lep, new_so);
961		if (real_lep == NULL) {
962			CTR2(KTR_IW_CXGBE, "%s: Could not find the real listen "
963					"ep for sock: %p", __func__, new_so);
964			log(LOG_ERR,"%s: Could not find the real listen ep for "
965					"sock: %p\n", __func__, new_so);
966			/* FIXME: properly free the 'new_so' in failure case.
967			 * Use of soabort() and  soclose() are not legal
968			 * here(before soaccept()).
969			 */
970			return;
971		}
972	} else /* for Non-Wildcard address, master_lep is always the real_lep */
973		real_lep = master_lep;
974
975	new_ep = alloc_ep(sizeof(*new_ep), GFP_KERNEL);
976
977	CTR6(KTR_IW_CXGBE, "%s: master_lep %p, real_lep: %p, new ep %p, "
978	    "listening so %p, new so %p", __func__, master_lep, real_lep,
979	    new_ep, master_lep->com.so, new_so);
980
981	new_ep->com.dev = real_lep->com.dev;
982	new_ep->com.so = new_so;
983	new_ep->com.cm_id = NULL;
984	new_ep->com.thread = real_lep->com.thread;
985	new_ep->parent_ep = real_lep;
986
987	GET_LOCAL_ADDR(&new_ep->com.local_addr, new_so);
988	GET_REMOTE_ADDR(&new_ep->com.remote_addr, new_so);
989	c4iw_get_ep(&real_lep->com);
990	init_timer(&new_ep->timer);
991	new_ep->com.state = MPA_REQ_WAIT;
992	START_EP_TIMER(new_ep);
993
994	setiwsockopt(new_so);
995	ret = soaccept(new_so, (struct sockaddr **)&remote);
996	if (ret != 0) {
997		CTR4(KTR_IW_CXGBE,
998				"%s:listen sock:%p, new sock:%p, ret:%d\n",
999				__func__, master_lep->com.so, new_so, ret);
1000		if (remote != NULL)
1001			free(remote, M_SONAME);
1002		uninit_iwarp_socket(new_so);
1003		soclose(new_so);
1004		c4iw_put_ep(&new_ep->com);
1005		c4iw_put_ep(&real_lep->com);
1006		return;
1007	}
1008	free(remote, M_SONAME);
1009
1010	/* MPA request might have been queued up on the socket already, so we
1011	 * initialize the socket/upcall_handler under lock to prevent processing
1012	 * MPA request on another thread(via process_req()) simultaniously.
1013	 */
1014	c4iw_get_ep(&new_ep->com); /* Dereferenced at the end below, this is to
1015				      avoid freeing of ep before ep unlock. */
1016	mutex_lock(&new_ep->com.mutex);
1017	init_iwarp_socket(new_so, &new_ep->com);
1018
1019	ret = process_mpa_request(new_ep);
1020	if (ret) {
1021		/* ABORT */
1022		c4iw_ep_disconnect(new_ep, 1, GFP_KERNEL);
1023		c4iw_put_ep(&real_lep->com);
1024	}
1025	mutex_unlock(&new_ep->com.mutex);
1026	c4iw_put_ep(&new_ep->com);
1027	return;
1028}
1029
1030static int
1031add_ep_to_req_list(struct c4iw_ep *ep, int new_ep_event)
1032{
1033	unsigned long flag;
1034
1035	spin_lock_irqsave(&req_lock, flag);
1036	if (ep && ep->com.so) {
1037		ep->com.ep_events |= new_ep_event;
1038		if (!ep->com.entry.tqe_prev) {
1039			c4iw_get_ep(&ep->com);
1040			TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
1041			queue_work(c4iw_taskq, &c4iw_task);
1042		}
1043	}
1044	spin_unlock_irqrestore(&req_lock, flag);
1045
1046	return (0);
1047}
1048
1049static int
1050c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
1051{
1052	struct c4iw_ep *ep = arg;
1053
1054	CTR6(KTR_IW_CXGBE,
1055	    "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
1056	    __func__, so, so->so_state, ep, states[ep->com.state],
1057	    ep->com.entry.tqe_prev);
1058
1059	MPASS(ep->com.so == so);
1060	/*
1061	 * Wake up any threads waiting in rdma_init()/rdma_fini(),
1062	 * with locks held.
1063	 */
1064	if (so->so_error)
1065		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1066	add_ep_to_req_list(ep, C4IW_EVENT_SOCKET);
1067
1068	return (SU_OK);
1069}
1070
1071
1072static int
1073terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1074{
1075	struct adapter *sc = iq->adapter;
1076	const struct cpl_rdma_terminate *cpl = mtod(m, const void *);
1077	unsigned int tid = GET_TID(cpl);
1078	struct toepcb *toep = lookup_tid(sc, tid);
1079	struct socket *so;
1080	struct c4iw_ep *ep;
1081
1082	INP_WLOCK(toep->inp);
1083	so = inp_inpcbtosocket(toep->inp);
1084	ep = so->so_rcv.sb_upcallarg;
1085	INP_WUNLOCK(toep->inp);
1086
1087	CTR3(KTR_IW_CXGBE, "%s: so %p, ep %p", __func__, so, ep);
1088	add_ep_to_req_list(ep, C4IW_EVENT_TERM);
1089
1090	return 0;
1091}
1092
1093static struct socket *
1094dequeue_socket(struct socket *head)
1095{
1096	struct socket *so;
1097	struct sockaddr_in *remote;
1098
1099	ACCEPT_LOCK();
1100	so = TAILQ_FIRST(&head->so_comp);
1101	if (!so) {
1102		ACCEPT_UNLOCK();
1103		return NULL;
1104	}
1105
1106	SOCK_LOCK(so);
1107	/*
1108	 * Before changing the flags on the socket, we have to bump the
1109	 * reference count.  Otherwise, if the protocol calls sofree(),
1110	 * the socket will be released due to a zero refcount.
1111	 */
1112	soref(so);
1113	TAILQ_REMOVE(&head->so_comp, so, so_list);
1114	head->so_qlen--;
1115	so->so_qstate &= ~SQ_COMP;
1116	so->so_head = NULL;
1117	so->so_state |= SS_NBIO;
1118	SOCK_UNLOCK(so);
1119	ACCEPT_UNLOCK();
1120	remote = NULL;
1121	soaccept(so, (struct sockaddr **)&remote);
1122
1123	free(remote, M_SONAME);
1124	return so;
1125}
1126
1127static void
1128process_socket_event(struct c4iw_ep *ep)
1129{
1130	int state = ep->com.state;
1131	struct socket *so = ep->com.so;
1132
1133	if (ep->com.state == DEAD) {
1134		CTR3(KTR_IW_CXGBE, "%s: Pending socket event discarded "
1135			"ep %p ep_state %s", __func__, ep, states[state]);
1136		return;
1137	}
1138
1139	CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
1140	    "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
1141	    so->so_error, so->so_rcv.sb_state, ep, states[state]);
1142
1143	if (state == CONNECTING) {
1144		process_connected(ep);
1145		return;
1146	}
1147
1148	if (state == LISTEN) {
1149		struct c4iw_listen_ep *lep = (struct c4iw_listen_ep *)ep;
1150		struct socket *new_so;
1151
1152		while ((new_so = dequeue_socket(so)) != NULL) {
1153			process_newconn(lep, new_so);
1154		}
1155		return;
1156	}
1157
1158	/* connection error */
1159	if (so->so_error) {
1160		process_conn_error(ep);
1161		return;
1162	}
1163
1164	/* peer close */
1165	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state <= CLOSING) {
1166		process_peer_close(ep);
1167		/*
1168		 * check whether socket disconnect event is pending before
1169		 * returning. Fallthrough if yes.
1170		 */
1171		if (!(so->so_state & SS_ISDISCONNECTED))
1172			return;
1173	}
1174
1175	/* close complete */
1176	if (so->so_state & SS_ISDISCONNECTED) {
1177		process_close_complete(ep);
1178		return;
1179	}
1180
1181	/* rx data */
1182	process_data(ep);
1183}
1184
1185SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters");
1186
1187static int dack_mode = 0;
1188SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0,
1189		"Delayed ack mode (default = 0)");
1190
1191int c4iw_max_read_depth = 8;
1192SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0,
1193		"Per-connection max ORD/IRD (default = 8)");
1194
1195static int enable_tcp_timestamps;
1196SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0,
1197		"Enable tcp timestamps (default = 0)");
1198
1199static int enable_tcp_sack;
1200SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0,
1201		"Enable tcp SACK (default = 0)");
1202
1203static int enable_tcp_window_scaling = 1;
1204SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0,
1205		"Enable tcp window scaling (default = 1)");
1206
1207int c4iw_debug = 1;
1208SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0,
1209		"Enable debug logging (default = 0)");
1210
1211static int peer2peer = 1;
1212SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0,
1213		"Support peer2peer ULPs (default = 1)");
1214
1215static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
1216SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0,
1217		"RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)");
1218
1219static int ep_timeout_secs = 60;
1220SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0,
1221		"CM Endpoint operation timeout in seconds (default = 60)");
1222
1223static int mpa_rev = 1;
1224SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0,
1225		"MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
1226
1227static int markers_enabled;
1228SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0,
1229		"Enable MPA MARKERS (default(0) = disabled)");
1230
1231static int crc_enabled = 1;
1232SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0,
1233		"Enable MPA CRC (default(1) = enabled)");
1234
1235static int rcv_win = 256 * 1024;
1236SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0,
1237		"TCP receive window in bytes (default = 256KB)");
1238
1239static int snd_win = 128 * 1024;
1240SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0,
1241		"TCP send window in bytes (default = 128KB)");
1242
1243static void
1244start_ep_timer(struct c4iw_ep *ep)
1245{
1246
1247	if (timer_pending(&ep->timer)) {
1248		CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
1249		printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
1250		    ep);
1251		return;
1252	}
1253	clear_bit(TIMEOUT, &ep->com.flags);
1254	c4iw_get_ep(&ep->com);
1255	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
1256	ep->timer.data = (unsigned long)ep;
1257	ep->timer.function = ep_timeout;
1258	add_timer(&ep->timer);
1259}
1260
1261static int
1262stop_ep_timer(struct c4iw_ep *ep)
1263{
1264
1265	del_timer_sync(&ep->timer);
1266	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
1267		c4iw_put_ep(&ep->com);
1268		return 0;
1269	}
1270	return 1;
1271}
1272
1273static void *
1274alloc_ep(int size, gfp_t gfp)
1275{
1276	struct c4iw_ep_common *epc;
1277
1278	epc = kzalloc(size, gfp);
1279	if (epc == NULL)
1280		return (NULL);
1281
1282	kref_init(&epc->kref);
1283	mutex_init(&epc->mutex);
1284	c4iw_init_wr_wait(&epc->wr_wait);
1285
1286	return (epc);
1287}
1288
1289void _c4iw_free_ep(struct kref *kref)
1290{
1291	struct c4iw_ep *ep;
1292	struct c4iw_ep_common *epc;
1293
1294	ep = container_of(kref, struct c4iw_ep, com.kref);
1295	epc = &ep->com;
1296	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
1297	    __func__, epc));
1298	if (test_bit(QP_REFERENCED, &ep->com.flags))
1299		deref_qp(ep);
1300	CTR4(KTR_IW_CXGBE, "%s: ep %p, history 0x%lx, flags 0x%lx",
1301	    __func__, ep, epc->history, epc->flags);
1302	kfree(ep);
1303}
1304
1305static void release_ep_resources(struct c4iw_ep *ep)
1306{
1307	CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
1308	set_bit(RELEASE_RESOURCES, &ep->com.flags);
1309	c4iw_put_ep(&ep->com);
1310	CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
1311}
1312
1313static int
1314send_mpa_req(struct c4iw_ep *ep)
1315{
1316	int mpalen;
1317	struct mpa_message *mpa;
1318	struct mpa_v2_conn_params mpa_v2_params;
1319	struct mbuf *m;
1320	char mpa_rev_to_use = mpa_rev;
1321	int err = 0;
1322
1323	if (ep->retry_with_mpa_v1)
1324		mpa_rev_to_use = 1;
1325	mpalen = sizeof(*mpa) + ep->plen;
1326	if (mpa_rev_to_use == 2)
1327		mpalen += sizeof(struct mpa_v2_conn_params);
1328
1329	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1330	if (mpa == NULL) {
1331		err = -ENOMEM;
1332		CTR3(KTR_IW_CXGBE, "%s:smr1 ep: %p , error: %d",
1333				__func__, ep, err);
1334		goto err;
1335	}
1336
1337	memset(mpa, 0, mpalen);
1338	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
1339	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
1340		(markers_enabled ? MPA_MARKERS : 0) |
1341		(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
1342	mpa->private_data_size = htons(ep->plen);
1343	mpa->revision = mpa_rev_to_use;
1344
1345	if (mpa_rev_to_use == 1) {
1346		ep->tried_with_mpa_v1 = 1;
1347		ep->retry_with_mpa_v1 = 0;
1348	}
1349
1350	if (mpa_rev_to_use == 2) {
1351		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
1352					    sizeof(struct mpa_v2_conn_params));
1353		mpa_v2_params.ird = htons((u16)ep->ird);
1354		mpa_v2_params.ord = htons((u16)ep->ord);
1355
1356		if (peer2peer) {
1357			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1358
1359			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1360				mpa_v2_params.ord |=
1361				    htons(MPA_V2_RDMA_WRITE_RTR);
1362			} else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1363				mpa_v2_params.ord |=
1364					htons(MPA_V2_RDMA_READ_RTR);
1365			}
1366		}
1367		memcpy(mpa->private_data, &mpa_v2_params,
1368			sizeof(struct mpa_v2_conn_params));
1369
1370		if (ep->plen) {
1371
1372			memcpy(mpa->private_data +
1373				sizeof(struct mpa_v2_conn_params),
1374				ep->mpa_pkt + sizeof(*mpa), ep->plen);
1375		}
1376	} else {
1377
1378		if (ep->plen)
1379			memcpy(mpa->private_data,
1380					ep->mpa_pkt + sizeof(*mpa), ep->plen);
1381		CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep);
1382	}
1383
1384	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1385	if (m == NULL) {
1386		err = -ENOMEM;
1387		CTR3(KTR_IW_CXGBE, "%s:smr2 ep: %p , error: %d",
1388				__func__, ep, err);
1389		free(mpa, M_CXGBE);
1390		goto err;
1391	}
1392	m_copyback(m, 0, mpalen, (void *)mpa);
1393	free(mpa, M_CXGBE);
1394
1395	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1396			ep->com.thread);
1397	if (err) {
1398		CTR3(KTR_IW_CXGBE, "%s:smr3 ep: %p , error: %d",
1399				__func__, ep, err);
1400		goto err;
1401	}
1402
1403	START_EP_TIMER(ep);
1404	ep->com.state = MPA_REQ_SENT;
1405	ep->mpa_attr.initiator = 1;
1406	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1407	return 0;
1408err:
1409	connect_reply_upcall(ep, err);
1410	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1411	return err;
1412}
1413
1414static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1415{
1416	int mpalen ;
1417	struct mpa_message *mpa;
1418	struct mpa_v2_conn_params mpa_v2_params;
1419	struct mbuf *m;
1420	int err;
1421
1422	CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid,
1423	    ep->plen);
1424
1425	mpalen = sizeof(*mpa) + plen;
1426
1427	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1428
1429		mpalen += sizeof(struct mpa_v2_conn_params);
1430		CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep,
1431		    ep->mpa_attr.version, mpalen);
1432	}
1433
1434	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1435	if (mpa == NULL)
1436		return (-ENOMEM);
1437
1438	memset(mpa, 0, mpalen);
1439	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1440	mpa->flags = MPA_REJECT;
1441	mpa->revision = mpa_rev;
1442	mpa->private_data_size = htons(plen);
1443
1444	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1445
1446		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1447		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
1448					    sizeof(struct mpa_v2_conn_params));
1449		mpa_v2_params.ird = htons(((u16)ep->ird) |
1450				(peer2peer ? MPA_V2_PEER2PEER_MODEL :
1451				 0));
1452		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1453					(p2p_type ==
1454					 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1455					 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1456					 FW_RI_INIT_P2PTYPE_READ_REQ ?
1457					 MPA_V2_RDMA_READ_RTR : 0) : 0));
1458		memcpy(mpa->private_data, &mpa_v2_params,
1459				sizeof(struct mpa_v2_conn_params));
1460
1461		if (ep->plen)
1462			memcpy(mpa->private_data +
1463				sizeof(struct mpa_v2_conn_params), pdata, plen);
1464		CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
1465		    mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
1466	} else
1467		if (plen)
1468			memcpy(mpa->private_data, pdata, plen);
1469
1470	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1471	if (m == NULL) {
1472		free(mpa, M_CXGBE);
1473		return (-ENOMEM);
1474	}
1475	m_copyback(m, 0, mpalen, (void *)mpa);
1476	free(mpa, M_CXGBE);
1477
1478	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
1479	if (!err)
1480		ep->snd_seq += mpalen;
1481	CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err);
1482	return err;
1483}
1484
1485static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1486{
1487	int mpalen;
1488	struct mpa_message *mpa;
1489	struct mbuf *m;
1490	struct mpa_v2_conn_params mpa_v2_params;
1491	int err;
1492
1493	CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep);
1494
1495	mpalen = sizeof(*mpa) + plen;
1496
1497	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1498
1499		CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep,
1500		    ep->mpa_attr.version);
1501		mpalen += sizeof(struct mpa_v2_conn_params);
1502	}
1503
1504	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1505	if (mpa == NULL)
1506		return (-ENOMEM);
1507
1508	memset(mpa, 0, sizeof(*mpa));
1509	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1510	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
1511		(markers_enabled ? MPA_MARKERS : 0);
1512	mpa->revision = ep->mpa_attr.version;
1513	mpa->private_data_size = htons(plen);
1514
1515	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1516
1517		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1518		mpa->private_data_size +=
1519			htons(sizeof(struct mpa_v2_conn_params));
1520		mpa_v2_params.ird = htons((u16)ep->ird);
1521		mpa_v2_params.ord = htons((u16)ep->ord);
1522		CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep,
1523		    ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord);
1524
1525		if (peer2peer && (ep->mpa_attr.p2p_type !=
1526			FW_RI_INIT_P2PTYPE_DISABLED)) {
1527
1528			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1529
1530			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1531
1532				mpa_v2_params.ord |=
1533					htons(MPA_V2_RDMA_WRITE_RTR);
1534				CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d",
1535				    __func__, ep, p2p_type, mpa_v2_params.ird,
1536				    mpa_v2_params.ord);
1537			}
1538			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1539
1540				mpa_v2_params.ord |=
1541					htons(MPA_V2_RDMA_READ_RTR);
1542				CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d",
1543				    __func__, ep, p2p_type, mpa_v2_params.ird,
1544				    mpa_v2_params.ord);
1545			}
1546		}
1547
1548		memcpy(mpa->private_data, &mpa_v2_params,
1549			sizeof(struct mpa_v2_conn_params));
1550
1551		if (ep->plen)
1552			memcpy(mpa->private_data +
1553				sizeof(struct mpa_v2_conn_params), pdata, plen);
1554	} else
1555		if (plen)
1556			memcpy(mpa->private_data, pdata, plen);
1557
1558	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1559	if (m == NULL) {
1560		free(mpa, M_CXGBE);
1561		return (-ENOMEM);
1562	}
1563	m_copyback(m, 0, mpalen, (void *)mpa);
1564	free(mpa, M_CXGBE);
1565
1566
1567	ep->com.state = MPA_REP_SENT;
1568	ep->snd_seq += mpalen;
1569	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1570			ep->com.thread);
1571	CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err);
1572	return err;
1573}
1574
1575
1576
1577static void close_complete_upcall(struct c4iw_ep *ep, int status)
1578{
1579	struct iw_cm_event event;
1580
1581	CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep);
1582	memset(&event, 0, sizeof(event));
1583	event.event = IW_CM_EVENT_CLOSE;
1584	event.status = status;
1585
1586	if (ep->com.cm_id) {
1587
1588		CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep);
1589		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1590		deref_cm_id(&ep->com);
1591		set_bit(CLOSE_UPCALL, &ep->com.history);
1592	}
1593	CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep);
1594}
1595
1596static int
1597send_abort(struct c4iw_ep *ep)
1598{
1599	struct socket *so = ep->com.so;
1600	struct sockopt sopt;
1601	int rc;
1602	struct linger l;
1603
1604	CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep, so,
1605	    states[ep->com.state], ep->hwtid);
1606
1607	l.l_onoff = 1;
1608	l.l_linger = 0;
1609
1610	/* linger_time of 0 forces RST to be sent */
1611	sopt.sopt_dir = SOPT_SET;
1612	sopt.sopt_level = SOL_SOCKET;
1613	sopt.sopt_name = SO_LINGER;
1614	sopt.sopt_val = (caddr_t)&l;
1615	sopt.sopt_valsize = sizeof l;
1616	sopt.sopt_td = NULL;
1617	rc = sosetopt(so, &sopt);
1618	if (rc != 0) {
1619		log(LOG_ERR, "%s: sosetopt(%p, linger = 0) failed with %d.\n",
1620		    __func__, so, rc);
1621	}
1622
1623	uninit_iwarp_socket(so);
1624	soclose(so);
1625	set_bit(ABORT_CONN, &ep->com.history);
1626
1627	/*
1628	 * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT
1629	 * request it has sent. But the current TOE driver is not propagating
1630	 * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work-
1631	 * around de-refererece 'ep' here instead of doing it in abort_rpl()
1632	 * handler(not yet implemented) of iw_cxgbe driver.
1633	 */
1634	release_ep_resources(ep);
1635
1636	return (0);
1637}
1638
1639static void peer_close_upcall(struct c4iw_ep *ep)
1640{
1641	struct iw_cm_event event;
1642
1643	CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep);
1644	memset(&event, 0, sizeof(event));
1645	event.event = IW_CM_EVENT_DISCONNECT;
1646
1647	if (ep->com.cm_id) {
1648
1649		CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep);
1650		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1651		set_bit(DISCONN_UPCALL, &ep->com.history);
1652	}
1653	CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep);
1654}
1655
1656static void peer_abort_upcall(struct c4iw_ep *ep)
1657{
1658	struct iw_cm_event event;
1659
1660	CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep);
1661	memset(&event, 0, sizeof(event));
1662	event.event = IW_CM_EVENT_CLOSE;
1663	event.status = -ECONNRESET;
1664
1665	if (ep->com.cm_id) {
1666
1667		CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep);
1668		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1669		deref_cm_id(&ep->com);
1670		set_bit(ABORT_UPCALL, &ep->com.history);
1671	}
1672	CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep);
1673}
1674
1675static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1676{
1677	struct iw_cm_event event;
1678
1679	CTR3(KTR_IW_CXGBE, "%s:cruB %p, status: %d", __func__, ep, status);
1680	memset(&event, 0, sizeof(event));
1681	event.event = IW_CM_EVENT_CONNECT_REPLY;
1682	event.status = ((status == -ECONNABORTED) || (status == -EPIPE)) ?
1683					-ECONNRESET : status;
1684	event.local_addr = ep->com.local_addr;
1685	event.remote_addr = ep->com.remote_addr;
1686
1687	if ((status == 0) || (status == -ECONNREFUSED)) {
1688
1689		if (!ep->tried_with_mpa_v1) {
1690
1691			CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
1692			/* this means MPA_v2 is used */
1693			event.ord = ep->ird;
1694			event.ird = ep->ord;
1695			event.private_data_len = ep->plen -
1696				sizeof(struct mpa_v2_conn_params);
1697			event.private_data = ep->mpa_pkt +
1698				sizeof(struct mpa_message) +
1699				sizeof(struct mpa_v2_conn_params);
1700		} else {
1701
1702			CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
1703			/* this means MPA_v1 is used */
1704			event.ord = c4iw_max_read_depth;
1705			event.ird = c4iw_max_read_depth;
1706			event.private_data_len = ep->plen;
1707			event.private_data = ep->mpa_pkt +
1708				sizeof(struct mpa_message);
1709		}
1710	}
1711
1712	if (ep->com.cm_id) {
1713
1714		CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep);
1715		set_bit(CONN_RPL_UPCALL, &ep->com.history);
1716		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1717	}
1718
1719	if(status == -ECONNABORTED) {
1720
1721		CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status);
1722		return;
1723	}
1724
1725	if (status < 0) {
1726
1727		CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status);
1728		deref_cm_id(&ep->com);
1729	}
1730
1731	CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep);
1732}
1733
1734static int connect_request_upcall(struct c4iw_ep *ep)
1735{
1736	struct iw_cm_event event;
1737	int ret;
1738
1739	CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep,
1740	    ep->tried_with_mpa_v1);
1741
1742	memset(&event, 0, sizeof(event));
1743	event.event = IW_CM_EVENT_CONNECT_REQUEST;
1744	event.local_addr = ep->com.local_addr;
1745	event.remote_addr = ep->com.remote_addr;
1746	event.provider_data = ep;
1747
1748	if (!ep->tried_with_mpa_v1) {
1749		/* this means MPA_v2 is used */
1750		event.ord = ep->ord;
1751		event.ird = ep->ird;
1752		event.private_data_len = ep->plen -
1753			sizeof(struct mpa_v2_conn_params);
1754		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1755			sizeof(struct mpa_v2_conn_params);
1756	} else {
1757
1758		/* this means MPA_v1 is used. Send max supported */
1759		event.ord = c4iw_max_read_depth;
1760		event.ird = c4iw_max_read_depth;
1761		event.private_data_len = ep->plen;
1762		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1763	}
1764
1765	c4iw_get_ep(&ep->com);
1766	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1767	    &event);
1768	if(ret) {
1769		CTR3(KTR_IW_CXGBE, "%s: ep %p, Failure while notifying event to"
1770			" IWCM, err:%d", __func__, ep, ret);
1771		c4iw_put_ep(&ep->com);
1772	} else
1773		/* Dereference parent_ep only in success case.
1774		 * In case of failure, parent_ep is dereferenced by the caller
1775		 * of process_mpa_request().
1776		 */
1777		c4iw_put_ep(&ep->parent_ep->com);
1778
1779	set_bit(CONNREQ_UPCALL, &ep->com.history);
1780	return ret;
1781}
1782
1783static void established_upcall(struct c4iw_ep *ep)
1784{
1785	struct iw_cm_event event;
1786
1787	CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep);
1788	memset(&event, 0, sizeof(event));
1789	event.event = IW_CM_EVENT_ESTABLISHED;
1790	event.ird = ep->ord;
1791	event.ord = ep->ird;
1792
1793	if (ep->com.cm_id) {
1794
1795		CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep);
1796		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1797		set_bit(ESTAB_UPCALL, &ep->com.history);
1798	}
1799	CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep);
1800}
1801
1802
1803#define RELAXED_IRD_NEGOTIATION 1
1804
1805/*
1806 * process_mpa_reply - process streaming mode MPA reply
1807 *
1808 * Returns:
1809 *
1810 * 0 upon success indicating a connect request was delivered to the ULP
1811 * or the mpa request is incomplete but valid so far.
1812 *
1813 * 1 if a failure requires the caller to close the connection.
1814 *
1815 * 2 if a failure requires the caller to abort the connection.
1816 */
1817static int process_mpa_reply(struct c4iw_ep *ep)
1818{
1819	struct mpa_message *mpa;
1820	struct mpa_v2_conn_params *mpa_v2_params;
1821	u16 plen;
1822	u16 resp_ird, resp_ord;
1823	u8 rtr_mismatch = 0, insuff_ird = 0;
1824	struct c4iw_qp_attributes attrs = {0};
1825	enum c4iw_qp_attr_mask mask;
1826	int err;
1827	struct mbuf *top, *m;
1828	int flags = MSG_DONTWAIT;
1829	struct uio uio;
1830	int disconnect = 0;
1831
1832	CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep);
1833
1834	/*
1835	 * Stop mpa timer.  If it expired, then
1836	 * we ignore the MPA reply.  process_timeout()
1837	 * will abort the connection.
1838	 */
1839	if (STOP_EP_TIMER(ep))
1840		return 0;
1841
1842	uio.uio_resid = 1000000;
1843	uio.uio_td = ep->com.thread;
1844	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
1845
1846	if (err) {
1847
1848		if (err == EWOULDBLOCK) {
1849
1850			CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep);
1851			START_EP_TIMER(ep);
1852			return 0;
1853		}
1854		err = -err;
1855		CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep);
1856		goto err;
1857	}
1858
1859	if (ep->com.so->so_rcv.sb_mb) {
1860
1861		CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep);
1862		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
1863		       __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
1864	}
1865
1866	m = top;
1867
1868	do {
1869
1870		CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep);
1871		/*
1872		 * If we get more than the supported amount of private data
1873		 * then we must fail this connection.
1874		 */
1875		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
1876
1877			CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep,
1878			    ep->mpa_pkt_len + m->m_len);
1879			err = (-EINVAL);
1880			goto err_stop_timer;
1881		}
1882
1883		/*
1884		 * copy the new data into our accumulation buffer.
1885		 */
1886		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
1887		ep->mpa_pkt_len += m->m_len;
1888		if (!m->m_next)
1889			m = m->m_nextpkt;
1890		else
1891			m = m->m_next;
1892	} while (m);
1893
1894	m_freem(top);
1895	/*
1896	 * if we don't even have the mpa message, then bail.
1897	 */
1898	if (ep->mpa_pkt_len < sizeof(*mpa)) {
1899		return 0;
1900	}
1901	mpa = (struct mpa_message *) ep->mpa_pkt;
1902
1903	/* Validate MPA header. */
1904	if (mpa->revision > mpa_rev) {
1905
1906		CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep,
1907		    mpa->revision, mpa_rev);
1908		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, "
1909				" Received = %d\n", __func__, mpa_rev, mpa->revision);
1910		err = -EPROTO;
1911		goto err_stop_timer;
1912	}
1913
1914	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1915
1916		CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep);
1917		err = -EPROTO;
1918		goto err_stop_timer;
1919	}
1920
1921	plen = ntohs(mpa->private_data_size);
1922
1923	/*
1924	 * Fail if there's too much private data.
1925	 */
1926	if (plen > MPA_MAX_PRIVATE_DATA) {
1927
1928		CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep);
1929		err = -EPROTO;
1930		goto err_stop_timer;
1931	}
1932
1933	/*
1934	 * If plen does not account for pkt size
1935	 */
1936	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1937
1938		CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep);
1939		STOP_EP_TIMER(ep);
1940		err = -EPROTO;
1941		goto err_stop_timer;
1942	}
1943
1944	ep->plen = (u8) plen;
1945
1946	/*
1947	 * If we don't have all the pdata yet, then bail.
1948	 * We'll continue process when more data arrives.
1949	 */
1950	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
1951
1952		CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep);
1953		return 0;
1954	}
1955
1956	if (mpa->flags & MPA_REJECT) {
1957
1958		CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep);
1959		err = -ECONNREFUSED;
1960		goto err_stop_timer;
1961	}
1962
1963	/*
1964	 * If we get here we have accumulated the entire mpa
1965	 * start reply message including private data. And
1966	 * the MPA header is valid.
1967	 */
1968	ep->com.state = FPDU_MODE;
1969	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1970	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1971	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1972	ep->mpa_attr.version = mpa->revision;
1973	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1974
1975	if (mpa->revision == 2) {
1976
1977		CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep);
1978		ep->mpa_attr.enhanced_rdma_conn =
1979			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1980
1981		if (ep->mpa_attr.enhanced_rdma_conn) {
1982
1983			CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep);
1984			mpa_v2_params = (struct mpa_v2_conn_params *)
1985				(ep->mpa_pkt + sizeof(*mpa));
1986			resp_ird = ntohs(mpa_v2_params->ird) &
1987				MPA_V2_IRD_ORD_MASK;
1988			resp_ord = ntohs(mpa_v2_params->ord) &
1989				MPA_V2_IRD_ORD_MASK;
1990
1991			/*
1992			 * This is a double-check. Ideally, below checks are
1993			 * not required since ird/ord stuff has been taken
1994			 * care of in c4iw_accept_cr
1995			 */
1996			if (ep->ird < resp_ord) {
1997				if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1998				   ep->com.dev->rdev.adap->params.max_ordird_qp)
1999					ep->ird = resp_ord;
2000				else
2001					insuff_ird = 1;
2002			} else if (ep->ird > resp_ord) {
2003				ep->ird = resp_ord;
2004			}
2005			if (ep->ord > resp_ird) {
2006				if (RELAXED_IRD_NEGOTIATION)
2007					ep->ord = resp_ird;
2008				else
2009					insuff_ird = 1;
2010			}
2011			if (insuff_ird) {
2012				err = -ENOMEM;
2013				ep->ird = resp_ord;
2014				ep->ord = resp_ird;
2015			}
2016
2017			if (ntohs(mpa_v2_params->ird) &
2018				MPA_V2_PEER2PEER_MODEL) {
2019
2020				CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep);
2021				if (ntohs(mpa_v2_params->ord) &
2022					MPA_V2_RDMA_WRITE_RTR) {
2023
2024					CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep);
2025					ep->mpa_attr.p2p_type =
2026						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
2027				}
2028				else if (ntohs(mpa_v2_params->ord) &
2029					MPA_V2_RDMA_READ_RTR) {
2030
2031					CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep);
2032					ep->mpa_attr.p2p_type =
2033						FW_RI_INIT_P2PTYPE_READ_REQ;
2034				}
2035			}
2036		}
2037	} else {
2038
2039		CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep);
2040
2041		if (mpa->revision == 1) {
2042
2043			CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep);
2044
2045			if (peer2peer) {
2046
2047				CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep);
2048				ep->mpa_attr.p2p_type = p2p_type;
2049			}
2050		}
2051	}
2052
2053	if (set_tcpinfo(ep)) {
2054
2055		CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep);
2056		printf("%s set_tcpinfo error\n", __func__);
2057		err = -ECONNRESET;
2058		goto err;
2059	}
2060
2061	CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, "
2062	    "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__,
2063	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
2064	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
2065	    ep->mpa_attr.p2p_type);
2066
2067	/*
2068	 * If responder's RTR does not match with that of initiator, assign
2069	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
2070	 * generated when moving QP to RTS state.
2071	 * A TERM message will be sent after QP has moved to RTS state
2072	 */
2073	if ((ep->mpa_attr.version == 2) && peer2peer &&
2074		(ep->mpa_attr.p2p_type != p2p_type)) {
2075
2076		CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep);
2077		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
2078		rtr_mismatch = 1;
2079	}
2080
2081
2082	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
2083	attrs.mpa_attr = ep->mpa_attr;
2084	attrs.max_ird = ep->ird;
2085	attrs.max_ord = ep->ord;
2086	attrs.llp_stream_handle = ep;
2087	attrs.next_state = C4IW_QP_STATE_RTS;
2088
2089	mask = C4IW_QP_ATTR_NEXT_STATE |
2090		C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
2091		C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
2092
2093	/* bind QP and TID with INIT_WR */
2094	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
2095
2096	if (err) {
2097
2098		CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep);
2099		goto err;
2100	}
2101
2102	/*
2103	 * If responder's RTR requirement did not match with what initiator
2104	 * supports, generate TERM message
2105	 */
2106	if (rtr_mismatch) {
2107
2108		CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep);
2109		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
2110		attrs.layer_etype = LAYER_MPA | DDP_LLP;
2111		attrs.ecode = MPA_NOMATCH_RTR;
2112		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2113		attrs.send_term = 1;
2114		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2115			C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2116		err = -ENOMEM;
2117		disconnect = 1;
2118		goto out;
2119	}
2120
2121	/*
2122	 * Generate TERM if initiator IRD is not sufficient for responder
2123	 * provided ORD. Currently, we do the same behaviour even when
2124	 * responder provided IRD is also not sufficient as regards to
2125	 * initiator ORD.
2126	 */
2127	if (insuff_ird) {
2128
2129		CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep);
2130		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
2131				__func__);
2132		attrs.layer_etype = LAYER_MPA | DDP_LLP;
2133		attrs.ecode = MPA_INSUFF_IRD;
2134		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2135		attrs.send_term = 1;
2136		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2137			C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2138		err = -ENOMEM;
2139		disconnect = 1;
2140		goto out;
2141	}
2142	goto out;
2143err_stop_timer:
2144	STOP_EP_TIMER(ep);
2145err:
2146	disconnect = 2;
2147out:
2148	connect_reply_upcall(ep, err);
2149	CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep);
2150	return disconnect;
2151}
2152
2153/*
2154 * process_mpa_request - process streaming mode MPA request
2155 *
2156 * Returns:
2157 *
2158 * 0 upon success indicating a connect request was delivered to the ULP
2159 * or the mpa request is incomplete but valid so far.
2160 *
2161 * 1 if a failure requires the caller to close the connection.
2162 *
2163 * 2 if a failure requires the caller to abort the connection.
2164 */
2165static int
2166process_mpa_request(struct c4iw_ep *ep)
2167{
2168	struct mpa_message *mpa;
2169	struct mpa_v2_conn_params *mpa_v2_params;
2170	u16 plen;
2171	int flags = MSG_DONTWAIT;
2172	int rc;
2173	struct iovec iov;
2174	struct uio uio;
2175	enum c4iw_ep_state state = ep->com.state;
2176
2177	CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]);
2178
2179	if (state != MPA_REQ_WAIT)
2180		return 0;
2181
2182	iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len];
2183	iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
2184	uio.uio_iov = &iov;
2185	uio.uio_iovcnt = 1;
2186	uio.uio_offset = 0;
2187	uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
2188	uio.uio_segflg = UIO_SYSSPACE;
2189	uio.uio_rw = UIO_READ;
2190	uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */
2191
2192	rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags);
2193	if (rc == EAGAIN)
2194		return 0;
2195	else if (rc)
2196		goto err_stop_timer;
2197
2198	KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data",
2199	    __func__, ep->com.so));
2200	ep->mpa_pkt_len += uio.uio_offset;
2201
2202	/*
2203	 * If we get more than the supported amount of private data then we must
2204	 * fail this connection.  XXX: check so_rcv->sb_cc, or peek with another
2205	 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last
2206	 * byte is filled by the soreceive above.
2207	 */
2208
2209	/* Don't even have the MPA message.  Wait for more data to arrive. */
2210	if (ep->mpa_pkt_len < sizeof(*mpa))
2211		return 0;
2212	mpa = (struct mpa_message *) ep->mpa_pkt;
2213
2214	/*
2215	 * Validate MPA Header.
2216	 */
2217	if (mpa->revision > mpa_rev) {
2218		log(LOG_ERR, "%s: MPA version mismatch. Local = %d,"
2219		    " Received = %d\n", __func__, mpa_rev, mpa->revision);
2220		goto err_stop_timer;
2221	}
2222
2223	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
2224		goto err_stop_timer;
2225
2226	/*
2227	 * Fail if there's too much private data.
2228	 */
2229	plen = ntohs(mpa->private_data_size);
2230	if (plen > MPA_MAX_PRIVATE_DATA)
2231		goto err_stop_timer;
2232
2233	/*
2234	 * If plen does not account for pkt size
2235	 */
2236	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
2237		goto err_stop_timer;
2238
2239	ep->plen = (u8) plen;
2240
2241	/*
2242	 * If we don't have all the pdata yet, then bail.
2243	 */
2244	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
2245		return 0;
2246
2247	/*
2248	 * If we get here we have accumulated the entire mpa
2249	 * start reply message including private data.
2250	 */
2251	ep->mpa_attr.initiator = 0;
2252	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
2253	ep->mpa_attr.recv_marker_enabled = markers_enabled;
2254	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
2255	ep->mpa_attr.version = mpa->revision;
2256	if (mpa->revision == 1)
2257		ep->tried_with_mpa_v1 = 1;
2258	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
2259
2260	if (mpa->revision == 2) {
2261		ep->mpa_attr.enhanced_rdma_conn =
2262		    mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
2263		if (ep->mpa_attr.enhanced_rdma_conn) {
2264			mpa_v2_params = (struct mpa_v2_conn_params *)
2265				(ep->mpa_pkt + sizeof(*mpa));
2266			ep->ird = ntohs(mpa_v2_params->ird) &
2267				MPA_V2_IRD_ORD_MASK;
2268			ep->ird = min_t(u32, ep->ird,
2269					cur_max_read_depth(ep->com.dev));
2270			ep->ord = ntohs(mpa_v2_params->ord) &
2271				MPA_V2_IRD_ORD_MASK;
2272			ep->ord = min_t(u32, ep->ord,
2273					cur_max_read_depth(ep->com.dev));
2274			CTR3(KTR_IW_CXGBE, "%s initiator ird %u ord %u\n",
2275				 __func__, ep->ird, ep->ord);
2276			if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
2277				if (peer2peer) {
2278					if (ntohs(mpa_v2_params->ord) &
2279							MPA_V2_RDMA_WRITE_RTR)
2280						ep->mpa_attr.p2p_type =
2281						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
2282					else if (ntohs(mpa_v2_params->ord) &
2283							MPA_V2_RDMA_READ_RTR)
2284						ep->mpa_attr.p2p_type =
2285						FW_RI_INIT_P2PTYPE_READ_REQ;
2286				}
2287		}
2288	} else if (mpa->revision == 1 && peer2peer)
2289		ep->mpa_attr.p2p_type = p2p_type;
2290
2291	if (set_tcpinfo(ep))
2292		goto err_stop_timer;
2293
2294	CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, "
2295	    "xmit_marker_enabled = %d, version = %d", __func__,
2296	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
2297	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
2298
2299	ep->com.state = MPA_REQ_RCVD;
2300	STOP_EP_TIMER(ep);
2301
2302	/* drive upcall */
2303	if (ep->parent_ep->com.state != DEAD)
2304		if (connect_request_upcall(ep))
2305			goto err_out;
2306	return 0;
2307
2308err_stop_timer:
2309	STOP_EP_TIMER(ep);
2310err_out:
2311	return 2;
2312}
2313
2314/*
2315 * Upcall from the adapter indicating data has been transmitted.
2316 * For us its just the single MPA request or reply.  We can now free
2317 * the skb holding the mpa message.
2318 */
2319int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2320{
2321	int err;
2322	struct c4iw_ep *ep = to_ep(cm_id);
2323	int abort = 0;
2324
2325	mutex_lock(&ep->com.mutex);
2326	CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep);
2327
2328	if ((ep->com.state == DEAD) ||
2329			(ep->com.state != MPA_REQ_RCVD)) {
2330
2331		CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep);
2332		mutex_unlock(&ep->com.mutex);
2333		c4iw_put_ep(&ep->com);
2334		return -ECONNRESET;
2335	}
2336	set_bit(ULP_REJECT, &ep->com.history);
2337
2338	if (mpa_rev == 0) {
2339
2340		CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep);
2341		abort = 1;
2342	}
2343	else {
2344
2345		CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep);
2346		abort = send_mpa_reject(ep, pdata, pdata_len);
2347	}
2348	STOP_EP_TIMER(ep);
2349	err = c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
2350	mutex_unlock(&ep->com.mutex);
2351	c4iw_put_ep(&ep->com);
2352	CTR3(KTR_IW_CXGBE, "%s:crc4 %p, err: %d", __func__, ep, err);
2353	return 0;
2354}
2355
2356int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2357{
2358	int err;
2359	struct c4iw_qp_attributes attrs = {0};
2360	enum c4iw_qp_attr_mask mask;
2361	struct c4iw_ep *ep = to_ep(cm_id);
2362	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2363	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2364	int abort = 0;
2365
2366	mutex_lock(&ep->com.mutex);
2367	CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep);
2368
2369	if ((ep->com.state == DEAD) ||
2370			(ep->com.state != MPA_REQ_RCVD)) {
2371
2372		CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep);
2373		err = -ECONNRESET;
2374		goto err_out;
2375	}
2376
2377	BUG_ON(!qp);
2378
2379	set_bit(ULP_ACCEPT, &ep->com.history);
2380
2381	if ((conn_param->ord > c4iw_max_read_depth) ||
2382		(conn_param->ird > c4iw_max_read_depth)) {
2383
2384		CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep);
2385		err = -EINVAL;
2386		goto err_abort;
2387	}
2388
2389	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2390
2391		CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep);
2392
2393		if (conn_param->ord > ep->ird) {
2394			if (RELAXED_IRD_NEGOTIATION) {
2395				conn_param->ord = ep->ird;
2396			} else {
2397				ep->ird = conn_param->ird;
2398				ep->ord = conn_param->ord;
2399				send_mpa_reject(ep, conn_param->private_data,
2400						conn_param->private_data_len);
2401				err = -ENOMEM;
2402				goto err_abort;
2403			}
2404		}
2405		if (conn_param->ird < ep->ord) {
2406			if (RELAXED_IRD_NEGOTIATION &&
2407			    ep->ord <= h->rdev.adap->params.max_ordird_qp) {
2408				conn_param->ird = ep->ord;
2409			} else {
2410				err = -ENOMEM;
2411				goto err_abort;
2412			}
2413		}
2414	}
2415	ep->ird = conn_param->ird;
2416	ep->ord = conn_param->ord;
2417
2418	if (ep->mpa_attr.version == 1) {
2419		if (peer2peer && ep->ird == 0)
2420			ep->ird = 1;
2421	} else {
2422		if (peer2peer &&
2423		    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
2424		    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
2425			ep->ird = 1;
2426	}
2427
2428	CTR4(KTR_IW_CXGBE, "%s %d ird %d ord %d\n", __func__, __LINE__,
2429			ep->ird, ep->ord);
2430
2431	ep->com.cm_id = cm_id;
2432	ref_cm_id(&ep->com);
2433	ep->com.qp = qp;
2434	ref_qp(ep);
2435	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
2436
2437	/* bind QP to EP and move to RTS */
2438	attrs.mpa_attr = ep->mpa_attr;
2439	attrs.max_ird = ep->ird;
2440	attrs.max_ord = ep->ord;
2441	attrs.llp_stream_handle = ep;
2442	attrs.next_state = C4IW_QP_STATE_RTS;
2443
2444	/* bind QP and TID with INIT_WR */
2445	mask = C4IW_QP_ATTR_NEXT_STATE |
2446		C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2447		C4IW_QP_ATTR_MPA_ATTR |
2448		C4IW_QP_ATTR_MAX_IRD |
2449		C4IW_QP_ATTR_MAX_ORD;
2450
2451	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
2452	if (err) {
2453		CTR3(KTR_IW_CXGBE, "%s:caca %p, err: %d", __func__, ep, err);
2454		goto err_defef_cm_id;
2455	}
2456
2457	err = send_mpa_reply(ep, conn_param->private_data,
2458			conn_param->private_data_len);
2459	if (err) {
2460		CTR3(KTR_IW_CXGBE, "%s:cacb %p, err: %d", __func__, ep, err);
2461		goto err_defef_cm_id;
2462	}
2463
2464	ep->com.state = FPDU_MODE;
2465	established_upcall(ep);
2466	mutex_unlock(&ep->com.mutex);
2467	c4iw_put_ep(&ep->com);
2468	CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep);
2469	return 0;
2470err_defef_cm_id:
2471	deref_cm_id(&ep->com);
2472err_abort:
2473	abort = 1;
2474err_out:
2475	if (abort)
2476		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2477	mutex_unlock(&ep->com.mutex);
2478	c4iw_put_ep(&ep->com);
2479	CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep);
2480	return err;
2481}
2482
2483static int
2484c4iw_sock_create(struct sockaddr_storage *laddr, struct socket **so)
2485{
2486	int ret;
2487	int size;
2488	struct socket *sock = NULL;
2489
2490	ret = sock_create_kern(laddr->ss_family,
2491			SOCK_STREAM, IPPROTO_TCP, &sock);
2492	if (ret) {
2493		CTR2(KTR_IW_CXGBE, "%s:Failed to create TCP socket. err %d",
2494				__func__, ret);
2495		return ret;
2496	}
2497
2498	ret = sobind(sock, (struct sockaddr *)laddr, curthread);
2499	if (ret) {
2500		CTR2(KTR_IW_CXGBE, "%s:Failed to bind socket. err %p",
2501				__func__, ret);
2502		sock_release(sock);
2503		return ret;
2504	}
2505
2506	size = laddr->ss_family == AF_INET6 ?
2507		sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
2508	ret = sock_getname(sock, (struct sockaddr *)laddr, &size, 0);
2509	if (ret) {
2510		CTR2(KTR_IW_CXGBE, "%s:sock_getname failed. err %p",
2511				__func__, ret);
2512		sock_release(sock);
2513		return ret;
2514	}
2515
2516	*so = sock;
2517	return 0;
2518}
2519
2520int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2521{
2522	int err = 0;
2523	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2524	struct c4iw_ep *ep = NULL;
2525	struct ifnet    *nh_ifp;        /* Logical egress interface */
2526
2527	CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id);
2528
2529
2530	if ((conn_param->ord > c4iw_max_read_depth) ||
2531		(conn_param->ird > c4iw_max_read_depth)) {
2532
2533		CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id);
2534		err = -EINVAL;
2535		goto out;
2536	}
2537	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2538
2539	init_timer(&ep->timer);
2540	ep->plen = conn_param->private_data_len;
2541
2542	if (ep->plen) {
2543
2544		CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep);
2545		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
2546				conn_param->private_data, ep->plen);
2547	}
2548	ep->ird = conn_param->ird;
2549	ep->ord = conn_param->ord;
2550
2551	if (peer2peer && ep->ord == 0) {
2552
2553		CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep);
2554		ep->ord = 1;
2555	}
2556
2557	ep->com.dev = dev;
2558	ep->com.cm_id = cm_id;
2559	ref_cm_id(&ep->com);
2560	ep->com.qp = get_qhp(dev, conn_param->qpn);
2561
2562	if (!ep->com.qp) {
2563
2564		CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep);
2565		err = -EINVAL;
2566		goto fail;
2567	}
2568	ref_qp(ep);
2569	ep->com.thread = curthread;
2570
2571	err = get_ifnet_from_raddr(&cm_id->remote_addr, &nh_ifp);
2572	if (err) {
2573
2574		CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep);
2575		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
2576		err = EHOSTUNREACH;
2577		return err;
2578	}
2579
2580	if (!(nh_ifp->if_capenable & IFCAP_TOE) ||
2581	    TOEDEV(nh_ifp) == NULL) {
2582		err = -ENOPROTOOPT;
2583		goto fail;
2584	}
2585	ep->com.state = CONNECTING;
2586	ep->tos = 0;
2587	ep->com.local_addr = cm_id->local_addr;
2588	ep->com.remote_addr = cm_id->remote_addr;
2589
2590	err = c4iw_sock_create(&cm_id->local_addr, &ep->com.so);
2591	if (err)
2592		goto fail;
2593
2594	setiwsockopt(ep->com.so);
2595	err = -soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
2596		ep->com.thread);
2597	if (!err) {
2598		init_iwarp_socket(ep->com.so, &ep->com);
2599		goto out;
2600	} else
2601		goto fail_free_so;
2602
2603fail_free_so:
2604	sock_release(ep->com.so);
2605fail:
2606	deref_cm_id(&ep->com);
2607	c4iw_put_ep(&ep->com);
2608	ep = NULL;
2609out:
2610	CTR2(KTR_IW_CXGBE, "%s:ccE ret:%d", __func__, err);
2611	return err;
2612}
2613
2614/*
2615 * iwcm->create_listen.  Returns -errno on failure.
2616 */
2617int
2618c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
2619{
2620	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2621	struct c4iw_listen_ep *lep = NULL;
2622	struct listen_port_info *port_info = NULL;
2623	int rc = 0;
2624
2625	CTR3(KTR_IW_CXGBE, "%s: cm_id %p, backlog %s", __func__, cm_id,
2626			backlog);
2627	lep = alloc_ep(sizeof(*lep), GFP_KERNEL);
2628	lep->com.cm_id = cm_id;
2629	ref_cm_id(&lep->com);
2630	lep->com.dev = dev;
2631	lep->backlog = backlog;
2632	lep->com.local_addr = cm_id->local_addr;
2633	lep->com.thread = curthread;
2634	cm_id->provider_data = lep;
2635	lep->com.state = LISTEN;
2636
2637	/* In case of INDADDR_ANY, ibcore creates cmid for each device and
2638	 * invokes iw_cxgbe listener callbacks assuming that iw_cxgbe creates
2639	 * HW listeners for each device seperately. But toecore expects single
2640	 * solisten() call with INADDR_ANY address to create HW listeners on
2641	 * all devices for a given port number. So iw_cxgbe driver calls
2642	 * solisten() only once for INADDR_ANY(usually done at first time
2643	 * listener callback from ibcore). And all the subsequent INADDR_ANY
2644	 * listener callbacks from ibcore(for the same port address) do not
2645	 * invoke solisten() as first listener callback has already created
2646	 * listeners for all other devices(via solisten).
2647	 */
2648	if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr)) {
2649		port_info = add_ep_to_listenlist(lep);
2650		/* skip solisten() if refcnt > 1, as the listeners were
2651		 * alredy created by 'Master lep'
2652		 */
2653		if (port_info->refcnt > 1) {
2654			/* As there will be only one listener socket for a TCP
2655			 * port, copy Master lep's socket pointer to other lep's
2656			 * that are belonging to same TCP port.
2657			 */
2658			struct c4iw_listen_ep *head_lep =
2659					container_of(port_info->lep_list.next,
2660					struct c4iw_listen_ep, listen_ep_list);
2661			lep->com.so =  head_lep->com.so;
2662			goto out;
2663		}
2664	}
2665	rc = c4iw_sock_create(&cm_id->local_addr, &lep->com.so);
2666	if (rc) {
2667		CTR2(KTR_IW_CXGBE, "%s:Failed to create socket. err %d",
2668				__func__, rc);
2669		goto fail;
2670	}
2671
2672	rc = solisten(lep->com.so, backlog, curthread);
2673	if (rc) {
2674		CTR3(KTR_IW_CXGBE, "%s:Failed to listen on sock:%p. err %d",
2675				__func__, lep->com.so, rc);
2676		goto fail_free_so;
2677	}
2678	init_iwarp_socket(lep->com.so, &lep->com);
2679out:
2680	return 0;
2681
2682fail_free_so:
2683	sock_release(lep->com.so);
2684fail:
2685	if (port_info)
2686		rem_ep_from_listenlist(lep);
2687	deref_cm_id(&lep->com);
2688	c4iw_put_ep(&lep->com);
2689	return rc;
2690}
2691
2692int
2693c4iw_destroy_listen(struct iw_cm_id *cm_id)
2694{
2695	struct c4iw_listen_ep *lep = to_listen_ep(cm_id);
2696
2697	mutex_lock(&lep->com.mutex);
2698	CTR3(KTR_IW_CXGBE, "%s: cm_id %p, state %s", __func__, cm_id,
2699	    states[lep->com.state]);
2700
2701	lep->com.state = DEAD;
2702	if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr)) {
2703		/* if no refcount then close listen socket */
2704		if (!rem_ep_from_listenlist(lep))
2705			close_socket(lep->com.so);
2706	} else
2707		close_socket(lep->com.so);
2708	deref_cm_id(&lep->com);
2709	mutex_unlock(&lep->com.mutex);
2710	c4iw_put_ep(&lep->com);
2711	return 0;
2712}
2713
2714int __c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2715{
2716	int ret;
2717	mutex_lock(&ep->com.mutex);
2718	ret = c4iw_ep_disconnect(ep, abrupt, gfp);
2719	mutex_unlock(&ep->com.mutex);
2720	return ret;
2721}
2722
2723int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2724{
2725	int ret = 0;
2726	int close = 0;
2727	int fatal = 0;
2728	struct c4iw_rdev *rdev;
2729
2730
2731	CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep);
2732
2733	rdev = &ep->com.dev->rdev;
2734
2735	if (c4iw_fatal_error(rdev)) {
2736
2737		CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep);
2738		fatal = 1;
2739		close_complete_upcall(ep, -ECONNRESET);
2740		send_abort(ep);
2741		ep->com.state = DEAD;
2742	}
2743	CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep,
2744	    states[ep->com.state]);
2745
2746	/*
2747	 * Ref the ep here in case we have fatal errors causing the
2748	 * ep to be released and freed.
2749	 */
2750	c4iw_get_ep(&ep->com);
2751	switch (ep->com.state) {
2752
2753		case MPA_REQ_WAIT:
2754		case MPA_REQ_SENT:
2755		case MPA_REQ_RCVD:
2756		case MPA_REP_SENT:
2757		case FPDU_MODE:
2758			close = 1;
2759			if (abrupt)
2760				ep->com.state = ABORTING;
2761			else {
2762				ep->com.state = CLOSING;
2763				START_EP_TIMER(ep);
2764			}
2765			set_bit(CLOSE_SENT, &ep->com.flags);
2766			break;
2767
2768		case CLOSING:
2769
2770			if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2771
2772				close = 1;
2773				if (abrupt) {
2774					STOP_EP_TIMER(ep);
2775					ep->com.state = ABORTING;
2776				} else
2777					ep->com.state = MORIBUND;
2778			}
2779			break;
2780
2781		case MORIBUND:
2782		case ABORTING:
2783		case DEAD:
2784			CTR3(KTR_IW_CXGBE,
2785			    "%s ignoring disconnect ep %p state %u", __func__,
2786			    ep, ep->com.state);
2787			break;
2788
2789		default:
2790			BUG();
2791			break;
2792	}
2793
2794
2795	if (close) {
2796
2797		CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep);
2798
2799		if (abrupt) {
2800
2801			CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep);
2802			set_bit(EP_DISC_ABORT, &ep->com.history);
2803			close_complete_upcall(ep, -ECONNRESET);
2804			ret = send_abort(ep);
2805			if (ret)
2806				fatal = 1;
2807		} else {
2808
2809			CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep);
2810			set_bit(EP_DISC_CLOSE, &ep->com.history);
2811
2812			if (!ep->parent_ep)
2813				ep->com.state = MORIBUND;
2814			sodisconnect(ep->com.so);
2815		}
2816
2817	}
2818
2819	if (fatal) {
2820		set_bit(EP_DISC_FAIL, &ep->com.history);
2821		if (!abrupt) {
2822			STOP_EP_TIMER(ep);
2823			close_complete_upcall(ep, -EIO);
2824		}
2825		if (ep->com.qp) {
2826			struct c4iw_qp_attributes attrs = {0};
2827
2828			attrs.next_state = C4IW_QP_STATE_ERROR;
2829			ret = c4iw_modify_qp(ep->com.dev, ep->com.qp,
2830						C4IW_QP_ATTR_NEXT_STATE,
2831						&attrs, 1);
2832			if (ret) {
2833				CTR2(KTR_IW_CXGBE, "%s:ced7 %p", __func__, ep);
2834				printf("%s - qp <- error failed!\n", __func__);
2835			}
2836		}
2837		release_ep_resources(ep);
2838		ep->com.state = DEAD;
2839		CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep);
2840	}
2841	c4iw_put_ep(&ep->com);
2842	CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep);
2843	return ret;
2844}
2845
2846#ifdef C4IW_EP_REDIRECT
2847int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2848		struct l2t_entry *l2t)
2849{
2850	struct c4iw_ep *ep = ctx;
2851
2852	if (ep->dst != old)
2853		return 0;
2854
2855	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2856			l2t);
2857	dst_hold(new);
2858	cxgb4_l2t_release(ep->l2t);
2859	ep->l2t = l2t;
2860	dst_release(old);
2861	ep->dst = new;
2862	return 1;
2863}
2864#endif
2865
2866
2867
2868static void ep_timeout(unsigned long arg)
2869{
2870	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
2871
2872	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
2873
2874		/*
2875		 * Only insert if it is not already on the list.
2876		 */
2877		if (!(ep->com.ep_events & C4IW_EVENT_TIMEOUT)) {
2878			CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep);
2879			add_ep_to_req_list(ep, C4IW_EVENT_TIMEOUT);
2880		}
2881	}
2882}
2883
2884static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl)
2885{
2886	uint64_t val = be64toh(*rpl);
2887	int ret;
2888	struct c4iw_wr_wait *wr_waitp;
2889
2890	ret = (int)((val >> 8) & 0xff);
2891	wr_waitp = (struct c4iw_wr_wait *)rpl[1];
2892	CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret);
2893	if (wr_waitp)
2894		c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2895
2896	return (0);
2897}
2898
2899static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl)
2900{
2901	struct cqe_list_entry *cle;
2902	unsigned long flag;
2903
2904	cle = malloc(sizeof(*cle), M_CXGBE, M_NOWAIT);
2905	cle->rhp = sc->iwarp_softc;
2906	cle->err_cqe = *(const struct t4_cqe *)(&rpl[0]);
2907
2908	spin_lock_irqsave(&err_cqe_lock, flag);
2909	list_add_tail(&cle->entry, &err_cqe_list);
2910	queue_work(c4iw_taskq, &c4iw_task);
2911	spin_unlock_irqrestore(&err_cqe_lock, flag);
2912
2913	return (0);
2914}
2915
2916static int
2917process_terminate(struct c4iw_ep *ep)
2918{
2919	struct c4iw_qp_attributes attrs = {0};
2920
2921	CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep);
2922
2923	if (ep && ep->com.qp) {
2924
2925		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n",
2926				ep->hwtid, ep->com.qp->wq.sq.qid);
2927		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2928		c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs,
2929				1);
2930	} else
2931		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n",
2932								ep->hwtid);
2933	CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep);
2934
2935	return 0;
2936}
2937
2938int __init c4iw_cm_init(void)
2939{
2940
2941	t4_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
2942	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, fw6_wr_rpl);
2943	t4_register_fw_msg_handler(FW6_TYPE_CQE, fw6_cqe_handler);
2944	t4_register_an_handler(c4iw_ev_handler);
2945
2946	TAILQ_INIT(&req_list);
2947	spin_lock_init(&req_lock);
2948	INIT_LIST_HEAD(&err_cqe_list);
2949	spin_lock_init(&err_cqe_lock);
2950
2951	INIT_WORK(&c4iw_task, process_req);
2952
2953	c4iw_taskq = create_singlethread_workqueue("iw_cxgbe");
2954	if (!c4iw_taskq)
2955		return -ENOMEM;
2956
2957	return 0;
2958}
2959
2960void __exit c4iw_cm_term(void)
2961{
2962	WARN_ON(!TAILQ_EMPTY(&req_list));
2963	WARN_ON(!list_empty(&err_cqe_list));
2964	flush_workqueue(c4iw_taskq);
2965	destroy_workqueue(c4iw_taskq);
2966
2967	t4_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
2968	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, NULL);
2969	t4_register_fw_msg_handler(FW6_TYPE_CQE, NULL);
2970	t4_register_an_handler(NULL);
2971}
2972#endif
2973