cm.c revision 336667
1/*
2 * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	  copyright notice, this list of conditions and the following
16 *	  disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	  copyright notice, this list of conditions and the following
20 *	  disclaimer in the documentation and/or other materials
21 *	  provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/iw_cxgbe/cm.c 336667 2018-07-24 13:13:30Z np $");
34
35#include "opt_inet.h"
36
37#ifdef TCP_OFFLOAD
38#include <sys/types.h>
39#include <sys/malloc.h>
40#include <sys/socket.h>
41#include <sys/socketvar.h>
42#include <sys/sockio.h>
43#include <sys/taskqueue.h>
44#include <netinet/in.h>
45#include <net/route.h>
46
47#include <netinet/in_systm.h>
48#include <netinet/in_pcb.h>
49#include <netinet6/in6_pcb.h>
50#include <netinet/ip.h>
51#include <netinet/in_fib.h>
52#include <netinet6/in6_fib.h>
53#include <netinet6/scope6_var.h>
54#include <netinet/ip_var.h>
55#include <netinet/tcp_var.h>
56#include <netinet/tcp.h>
57#include <netinet/tcpip.h>
58
59#include <netinet/toecore.h>
60
61struct sge_iq;
62struct rss_header;
63struct cpl_set_tcb_rpl;
64#include <linux/types.h>
65#include "offload.h"
66#include "tom/t4_tom.h"
67
68#define TOEPCB(so)  ((struct toepcb *)(so_sototcpcb((so))->t_toe))
69
70#include "iw_cxgbe.h"
71#include <linux/module.h>
72#include <linux/workqueue.h>
73#include <linux/notifier.h>
74#include <linux/inetdevice.h>
75#include <linux/if_vlan.h>
76#include <net/netevent.h>
77
78static spinlock_t req_lock;
79static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
80static struct work_struct c4iw_task;
81static struct workqueue_struct *c4iw_taskq;
82static LIST_HEAD(err_cqe_list);
83static spinlock_t err_cqe_lock;
84static LIST_HEAD(listen_port_list);
85static DEFINE_MUTEX(listen_port_mutex);
86
87static void process_req(struct work_struct *ctx);
88static void start_ep_timer(struct c4iw_ep *ep);
89static int stop_ep_timer(struct c4iw_ep *ep);
90static int set_tcpinfo(struct c4iw_ep *ep);
91static void process_timeout(struct c4iw_ep *ep);
92static void process_err_cqes(void);
93static void *alloc_ep(int size, gfp_t flags);
94static void close_socket(struct socket *so);
95static int send_mpa_req(struct c4iw_ep *ep);
96static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
97static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
98static void close_complete_upcall(struct c4iw_ep *ep, int status);
99static int send_abort(struct c4iw_ep *ep);
100static void peer_close_upcall(struct c4iw_ep *ep);
101static void peer_abort_upcall(struct c4iw_ep *ep);
102static void connect_reply_upcall(struct c4iw_ep *ep, int status);
103static int connect_request_upcall(struct c4iw_ep *ep);
104static void established_upcall(struct c4iw_ep *ep);
105static int process_mpa_reply(struct c4iw_ep *ep);
106static int process_mpa_request(struct c4iw_ep *ep);
107static void process_peer_close(struct c4iw_ep *ep);
108static void process_conn_error(struct c4iw_ep *ep);
109static void process_close_complete(struct c4iw_ep *ep);
110static void ep_timeout(unsigned long arg);
111static void setiwsockopt(struct socket *so);
112static void init_iwarp_socket(struct socket *so, void *arg);
113static void uninit_iwarp_socket(struct socket *so);
114static void process_data(struct c4iw_ep *ep);
115static void process_connected(struct c4iw_ep *ep);
116static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
117static void process_socket_event(struct c4iw_ep *ep);
118static void release_ep_resources(struct c4iw_ep *ep);
119static int process_terminate(struct c4iw_ep *ep);
120static int terminate(struct sge_iq *iq, const struct rss_header *rss,
121    struct mbuf *m);
122static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events);
123static struct listen_port_info *
124add_ep_to_listenlist(struct c4iw_listen_ep *lep);
125static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep);
126static struct c4iw_listen_ep *
127find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so);
128static int get_ifnet_from_raddr(struct sockaddr_storage *raddr,
129		struct ifnet **ifp);
130static void process_newconn(struct c4iw_listen_ep *master_lep,
131		struct socket *new_so);
132#define START_EP_TIMER(ep) \
133    do { \
134	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
135		__func__, __LINE__, (ep)); \
136	    start_ep_timer(ep); \
137    } while (0)
138
139#define STOP_EP_TIMER(ep) \
140    ({ \
141	    CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
142		__func__, __LINE__, (ep)); \
143	    stop_ep_timer(ep); \
144    })
145
146#define GET_LOCAL_ADDR(pladdr, so) \
147	do { \
148		struct sockaddr_storage *__a = NULL; \
149		struct  inpcb *__inp = sotoinpcb(so); \
150		KASSERT(__inp != NULL, \
151		   ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
152		if (__inp->inp_vflag & INP_IPV4) \
153			in_getsockaddr(so, (struct sockaddr **)&__a); \
154		else \
155			in6_getsockaddr(so, (struct sockaddr **)&__a); \
156		*(pladdr) = *__a; \
157		free(__a, M_SONAME); \
158	} while (0)
159
160#define GET_REMOTE_ADDR(praddr, so) \
161	do { \
162		struct sockaddr_storage *__a = NULL; \
163		struct  inpcb *__inp = sotoinpcb(so); \
164		KASSERT(__inp != NULL, \
165		   ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
166		if (__inp->inp_vflag & INP_IPV4) \
167			in_getpeeraddr(so, (struct sockaddr **)&__a); \
168		else \
169			in6_getpeeraddr(so, (struct sockaddr **)&__a); \
170		*(praddr) = *__a; \
171		free(__a, M_SONAME); \
172	} while (0)
173
174#ifdef KTR
175static char *states[] = {
176	"idle",
177	"listen",
178	"connecting",
179	"mpa_wait_req",
180	"mpa_req_sent",
181	"mpa_req_rcvd",
182	"mpa_rep_sent",
183	"fpdu_mode",
184	"aborting",
185	"closing",
186	"moribund",
187	"dead",
188	NULL,
189};
190#endif
191
192static void deref_cm_id(struct c4iw_ep_common *epc)
193{
194      epc->cm_id->rem_ref(epc->cm_id);
195      epc->cm_id = NULL;
196      set_bit(CM_ID_DEREFED, &epc->history);
197}
198
199static void ref_cm_id(struct c4iw_ep_common *epc)
200{
201      set_bit(CM_ID_REFED, &epc->history);
202      epc->cm_id->add_ref(epc->cm_id);
203}
204
205static void deref_qp(struct c4iw_ep *ep)
206{
207	c4iw_qp_rem_ref(&ep->com.qp->ibqp);
208	clear_bit(QP_REFERENCED, &ep->com.flags);
209	set_bit(QP_DEREFED, &ep->com.history);
210}
211
212static void ref_qp(struct c4iw_ep *ep)
213{
214	set_bit(QP_REFERENCED, &ep->com.flags);
215	set_bit(QP_REFED, &ep->com.history);
216	c4iw_qp_add_ref(&ep->com.qp->ibqp);
217}
218/* allocated per TCP port while listening */
219struct listen_port_info {
220	uint16_t port_num; /* TCP port address */
221	struct list_head list; /* belongs to listen_port_list */
222	struct list_head lep_list; /* per port lep list */
223	uint32_t refcnt; /* number of lep's listening */
224};
225
226/*
227 * Following two lists are used to manage INADDR_ANY listeners:
228 * 1)listen_port_list
229 * 2)lep_list
230 *
231 * Below is the INADDR_ANY listener lists overview on a system with a two port
232 * adapter:
233 *   |------------------|
234 *   |listen_port_list  |
235 *   |------------------|
236 *            |
237 *            |              |-----------|       |-----------|
238 *            |              | port_num:X|       | port_num:X|
239 *            |--------------|-list------|-------|-list------|-------....
240 *                           | lep_list----|     | lep_list----|
241 *                           | refcnt    | |     | refcnt    | |
242 *                           |           | |     |           | |
243 *                           |           | |     |           | |
244 *                           |-----------| |     |-----------| |
245 *                                         |                   |
246 *                                         |                   |
247 *                                         |                   |
248 *                                         |                   |         lep1                  lep2
249 *                                         |                   |    |----------------|    |----------------|
250 *                                         |                   |----| listen_ep_list |----| listen_ep_list |
251 *                                         |                        |----------------|    |----------------|
252 *                                         |
253 *                                         |
254 *                                         |        lep1                  lep2
255 *                                         |   |----------------|    |----------------|
256 *                                         |---| listen_ep_list |----| listen_ep_list |
257 *                                             |----------------|    |----------------|
258 *
259 * Because of two port adapter, the number of lep's are two(lep1 & lep2) for
260 * each TCP port number.
261 *
262 * Here 'lep1' is always marked as Master lep, because solisten() is always
263 * called through first lep.
264 *
265 */
266static struct listen_port_info *
267add_ep_to_listenlist(struct c4iw_listen_ep *lep)
268{
269	uint16_t port;
270	struct listen_port_info *port_info = NULL;
271	struct sockaddr_storage *laddr = &lep->com.local_addr;
272
273	port = (laddr->ss_family == AF_INET) ?
274		((struct sockaddr_in *)laddr)->sin_port :
275		((struct sockaddr_in6 *)laddr)->sin6_port;
276
277	mutex_lock(&listen_port_mutex);
278
279	list_for_each_entry(port_info, &listen_port_list, list)
280		if (port_info->port_num == port)
281			goto found_port;
282
283	port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK);
284	port_info->port_num = port;
285	port_info->refcnt    = 0;
286
287	list_add_tail(&port_info->list, &listen_port_list);
288	INIT_LIST_HEAD(&port_info->lep_list);
289
290found_port:
291	port_info->refcnt++;
292	list_add_tail(&lep->listen_ep_list, &port_info->lep_list);
293	mutex_unlock(&listen_port_mutex);
294	return port_info;
295}
296
297static int
298rem_ep_from_listenlist(struct c4iw_listen_ep *lep)
299{
300	uint16_t port;
301	struct listen_port_info *port_info = NULL;
302	struct sockaddr_storage *laddr = &lep->com.local_addr;
303	int refcnt = 0;
304
305	port = (laddr->ss_family == AF_INET) ?
306		((struct sockaddr_in *)laddr)->sin_port :
307		((struct sockaddr_in6 *)laddr)->sin6_port;
308
309	mutex_lock(&listen_port_mutex);
310
311	/* get the port_info structure based on the lep's port address */
312	list_for_each_entry(port_info, &listen_port_list, list) {
313		if (port_info->port_num == port) {
314			port_info->refcnt--;
315			refcnt = port_info->refcnt;
316			/* remove the current lep from the listen list */
317			list_del(&lep->listen_ep_list);
318			if (port_info->refcnt == 0) {
319				/* Remove this entry from the list as there
320				 * are no more listeners for this port_num.
321				 */
322				list_del(&port_info->list);
323				kfree(port_info);
324			}
325			break;
326		}
327	}
328	mutex_unlock(&listen_port_mutex);
329	return refcnt;
330}
331
332/*
333 * Find the lep that belongs to the ifnet on which the SYN frame was received.
334 */
335struct c4iw_listen_ep *
336find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so)
337{
338	struct adapter *adap = NULL;
339	struct c4iw_listen_ep *lep = NULL;
340	struct sockaddr_storage remote = { 0 };
341	struct ifnet *new_conn_ifp = NULL;
342	struct listen_port_info *port_info = NULL;
343	int err = 0, i = 0,
344	    found_portinfo = 0, found_lep = 0;
345	uint16_t port;
346
347	/* STEP 1: get 'ifnet' based on socket's remote address */
348	GET_REMOTE_ADDR(&remote, so);
349
350	err = get_ifnet_from_raddr(&remote, &new_conn_ifp);
351	if (err) {
352		CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, "
353				"master_lep %p err %d",
354				__func__, so, master_lep, err);
355		return (NULL);
356	}
357
358	/* STEP 2: Find 'port_info' with listener local port address. */
359	port = (master_lep->com.local_addr.ss_family == AF_INET) ?
360		((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port :
361		((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port;
362
363
364	mutex_lock(&listen_port_mutex);
365	list_for_each_entry(port_info, &listen_port_list, list)
366		if (port_info->port_num == port) {
367			found_portinfo =1;
368			break;
369		}
370	if (!found_portinfo)
371		goto out;
372
373	/* STEP 3: Traverse through list of lep's that are bound to the current
374	 * TCP port address and find the lep that belongs to the ifnet on which
375	 * the SYN frame was received.
376	 */
377	list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) {
378		adap = lep->com.dev->rdev.adap;
379		for_each_port(adap, i) {
380			if (new_conn_ifp == adap->port[i]->vi[0].ifp) {
381				found_lep =1;
382				goto out;
383			}
384		}
385	}
386out:
387	mutex_unlock(&listen_port_mutex);
388	return found_lep ? lep : (NULL);
389}
390
391static void process_timeout(struct c4iw_ep *ep)
392{
393	struct c4iw_qp_attributes attrs = {0};
394	int abort = 1;
395
396	CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
397			ep, ep->hwtid, ep->com.state);
398	set_bit(TIMEDOUT, &ep->com.history);
399	switch (ep->com.state) {
400	case MPA_REQ_SENT:
401		connect_reply_upcall(ep, -ETIMEDOUT);
402		break;
403	case MPA_REQ_WAIT:
404	case MPA_REQ_RCVD:
405	case MPA_REP_SENT:
406	case FPDU_MODE:
407		break;
408	case CLOSING:
409	case MORIBUND:
410		if (ep->com.cm_id && ep->com.qp) {
411			attrs.next_state = C4IW_QP_STATE_ERROR;
412			c4iw_modify_qp(ep->com.dev, ep->com.qp,
413					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
414		}
415		close_complete_upcall(ep, -ETIMEDOUT);
416		break;
417	case ABORTING:
418	case DEAD:
419		/*
420		 * These states are expected if the ep timed out at the same
421		 * time as another thread was calling stop_ep_timer().
422		 * So we silently do nothing for these states.
423		 */
424		abort = 0;
425		break;
426	default:
427		CTR4(KTR_IW_CXGBE, "%s unexpected state ep %p tid %u state %u\n"
428				, __func__, ep, ep->hwtid, ep->com.state);
429		abort = 0;
430	}
431	if (abort)
432		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
433	c4iw_put_ep(&ep->com);
434	return;
435}
436
437struct cqe_list_entry {
438	struct list_head entry;
439	struct c4iw_dev *rhp;
440	struct t4_cqe err_cqe;
441};
442
443static void
444process_err_cqes(void)
445{
446	unsigned long flag;
447	struct cqe_list_entry *cle;
448
449	spin_lock_irqsave(&err_cqe_lock, flag);
450	while (!list_empty(&err_cqe_list)) {
451		struct list_head *tmp;
452		tmp = err_cqe_list.next;
453		list_del(tmp);
454		tmp->next = tmp->prev = NULL;
455		spin_unlock_irqrestore(&err_cqe_lock, flag);
456		cle = list_entry(tmp, struct cqe_list_entry, entry);
457		c4iw_ev_dispatch(cle->rhp, &cle->err_cqe);
458		free(cle, M_CXGBE);
459		spin_lock_irqsave(&err_cqe_lock, flag);
460	}
461	spin_unlock_irqrestore(&err_cqe_lock, flag);
462
463	return;
464}
465
466static void
467process_req(struct work_struct *ctx)
468{
469	struct c4iw_ep_common *epc;
470	unsigned long flag;
471	int ep_events;
472
473	process_err_cqes();
474	spin_lock_irqsave(&req_lock, flag);
475	while (!TAILQ_EMPTY(&req_list)) {
476		epc = TAILQ_FIRST(&req_list);
477		TAILQ_REMOVE(&req_list, epc, entry);
478		epc->entry.tqe_prev = NULL;
479		ep_events = epc->ep_events;
480		epc->ep_events = 0;
481		spin_unlock_irqrestore(&req_lock, flag);
482		mutex_lock(&epc->mutex);
483		CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x",
484		    __func__, epc->so, epc, states[epc->state], ep_events);
485		if (ep_events & C4IW_EVENT_TERM)
486			process_terminate((struct c4iw_ep *)epc);
487		if (ep_events & C4IW_EVENT_TIMEOUT)
488			process_timeout((struct c4iw_ep *)epc);
489		if (ep_events & C4IW_EVENT_SOCKET)
490			process_socket_event((struct c4iw_ep *)epc);
491		mutex_unlock(&epc->mutex);
492		c4iw_put_ep(epc);
493		process_err_cqes();
494		spin_lock_irqsave(&req_lock, flag);
495	}
496	spin_unlock_irqrestore(&req_lock, flag);
497}
498
499/*
500 * XXX: doesn't belong here in the iWARP driver.
501 * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
502 *      set.  Is this a valid assumption for active open?
503 */
504static int
505set_tcpinfo(struct c4iw_ep *ep)
506{
507	struct socket *so = ep->com.so;
508	struct inpcb *inp = sotoinpcb(so);
509	struct tcpcb *tp;
510	struct toepcb *toep;
511	int rc = 0;
512
513	INP_WLOCK(inp);
514	tp = intotcpcb(inp);
515	if ((tp->t_flags & TF_TOE) == 0) {
516		rc = EINVAL;
517		log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
518		    __func__, so, ep);
519		goto done;
520	}
521	toep = TOEPCB(so);
522
523	ep->hwtid = toep->tid;
524	ep->snd_seq = tp->snd_nxt;
525	ep->rcv_seq = tp->rcv_nxt;
526	ep->emss = max(tp->t_maxseg, 128);
527done:
528	INP_WUNLOCK(inp);
529	return (rc);
530
531}
532static int
533get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp)
534{
535	int err = 0;
536
537	if (raddr->ss_family == AF_INET) {
538		struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr;
539		struct nhop4_extended nh4 = {0};
540
541		err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr,
542				NHR_REF, 0, &nh4);
543		*ifp = nh4.nh_ifp;
544		if (err)
545			fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
546	} else {
547		struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr;
548		struct nhop6_extended nh6 = {0};
549		struct in6_addr addr6;
550		uint32_t scopeid;
551
552		memset(&addr6, 0, sizeof(addr6));
553		in6_splitscope((struct in6_addr *)&raddr6->sin6_addr,
554					&addr6, &scopeid);
555		err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid,
556				NHR_REF, 0, &nh6);
557		*ifp = nh6.nh_ifp;
558		if (err)
559			fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6);
560	}
561
562	CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err);
563	return err;
564}
565
566static void
567close_socket(struct socket *so)
568{
569	uninit_iwarp_socket(so);
570	soclose(so);
571}
572
573static void
574process_peer_close(struct c4iw_ep *ep)
575{
576	struct c4iw_qp_attributes attrs = {0};
577	int disconnect = 1;
578	int release = 0;
579
580	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
581	    ep->com.so, states[ep->com.state]);
582
583	switch (ep->com.state) {
584
585		case MPA_REQ_WAIT:
586			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD",
587			    __func__, ep);
588			/* Fallthrough */
589		case MPA_REQ_SENT:
590			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD",
591			    __func__, ep);
592			ep->com.state = DEAD;
593			connect_reply_upcall(ep, -ECONNABORTED);
594
595			disconnect = 0;
596			STOP_EP_TIMER(ep);
597			close_socket(ep->com.so);
598			deref_cm_id(&ep->com);
599			release = 1;
600			break;
601
602		case MPA_REQ_RCVD:
603
604			/*
605			 * We're gonna mark this puppy DEAD, but keep
606			 * the reference on it until the ULP accepts or
607			 * rejects the CR.
608			 */
609			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
610			    __func__, ep);
611			ep->com.state = CLOSING;
612			break;
613
614		case MPA_REP_SENT:
615			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
616			    __func__, ep);
617			ep->com.state = CLOSING;
618			break;
619
620		case FPDU_MODE:
621			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
622			    __func__, ep);
623			START_EP_TIMER(ep);
624			ep->com.state = CLOSING;
625			attrs.next_state = C4IW_QP_STATE_CLOSING;
626			c4iw_modify_qp(ep->com.dev, ep->com.qp,
627					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
628			peer_close_upcall(ep);
629			break;
630
631		case ABORTING:
632			CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
633			    __func__, ep);
634			disconnect = 0;
635			break;
636
637		case CLOSING:
638			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
639			    __func__, ep);
640			ep->com.state = MORIBUND;
641			disconnect = 0;
642			break;
643
644		case MORIBUND:
645			CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
646			    ep);
647			STOP_EP_TIMER(ep);
648			if (ep->com.cm_id && ep->com.qp) {
649				attrs.next_state = C4IW_QP_STATE_IDLE;
650				c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
651						C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
652			}
653			close_socket(ep->com.so);
654			close_complete_upcall(ep, 0);
655			ep->com.state = DEAD;
656			release = 1;
657			disconnect = 0;
658			break;
659
660		case DEAD:
661			CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
662			    __func__, ep);
663			disconnect = 0;
664			break;
665
666		default:
667			panic("%s: ep %p state %d", __func__, ep,
668			    ep->com.state);
669			break;
670	}
671
672
673	if (disconnect) {
674
675		CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
676		c4iw_ep_disconnect(ep, 0, M_NOWAIT);
677	}
678	if (release) {
679
680		CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
681		c4iw_put_ep(&ep->com);
682	}
683	CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
684	return;
685}
686
687static void
688process_conn_error(struct c4iw_ep *ep)
689{
690	struct c4iw_qp_attributes attrs = {0};
691	int ret;
692	int state;
693
694	state = ep->com.state;
695	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
696	    __func__, ep, ep->com.so, ep->com.so->so_error,
697	    states[ep->com.state]);
698
699	switch (state) {
700
701		case MPA_REQ_WAIT:
702			STOP_EP_TIMER(ep);
703			c4iw_put_ep(&ep->parent_ep->com);
704			break;
705
706		case MPA_REQ_SENT:
707			STOP_EP_TIMER(ep);
708			connect_reply_upcall(ep, -ECONNRESET);
709			break;
710
711		case MPA_REP_SENT:
712			ep->com.rpl_err = ECONNRESET;
713			CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
714			break;
715
716		case MPA_REQ_RCVD:
717			break;
718
719		case MORIBUND:
720		case CLOSING:
721			STOP_EP_TIMER(ep);
722			/*FALLTHROUGH*/
723		case FPDU_MODE:
724
725			if (ep->com.cm_id && ep->com.qp) {
726
727				attrs.next_state = C4IW_QP_STATE_ERROR;
728				ret = c4iw_modify_qp(ep->com.qp->rhp,
729					ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
730					&attrs, 1);
731				if (ret)
732					log(LOG_ERR,
733							"%s - qp <- error failed!\n",
734							__func__);
735			}
736			peer_abort_upcall(ep);
737			break;
738
739		case ABORTING:
740			break;
741
742		case DEAD:
743			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
744			    __func__, ep->com.so->so_error);
745			return;
746
747		default:
748			panic("%s: ep %p state %d", __func__, ep, state);
749			break;
750	}
751
752	if (state != ABORTING) {
753		close_socket(ep->com.so);
754		ep->com.state = DEAD;
755		c4iw_put_ep(&ep->com);
756	}
757	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
758	return;
759}
760
761static void
762process_close_complete(struct c4iw_ep *ep)
763{
764	struct c4iw_qp_attributes attrs = {0};
765	int release = 0;
766
767	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
768	    ep->com.so, states[ep->com.state]);
769
770	/* The cm_id may be null if we failed to connect */
771	set_bit(CLOSE_CON_RPL, &ep->com.history);
772
773	switch (ep->com.state) {
774
775		case CLOSING:
776			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
777			    __func__, ep);
778			ep->com.state = MORIBUND;
779			break;
780
781		case MORIBUND:
782			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
783			    ep);
784			STOP_EP_TIMER(ep);
785
786			if ((ep->com.cm_id) && (ep->com.qp)) {
787
788				CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
789				    __func__, ep);
790				attrs.next_state = C4IW_QP_STATE_IDLE;
791				c4iw_modify_qp(ep->com.dev,
792						ep->com.qp,
793						C4IW_QP_ATTR_NEXT_STATE,
794						&attrs, 1);
795			}
796
797			close_socket(ep->com.so);
798			close_complete_upcall(ep, 0);
799			ep->com.state = DEAD;
800			release = 1;
801			break;
802
803		case ABORTING:
804			CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
805			break;
806
807		case DEAD:
808			CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
809			break;
810		default:
811			CTR2(KTR_IW_CXGBE, "%s:pcc7 %p unknown ep state",
812					__func__, ep);
813			panic("%s:pcc6 %p unknown ep state", __func__, ep);
814			break;
815	}
816
817	if (release) {
818
819		CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
820		release_ep_resources(ep);
821	}
822	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
823	return;
824}
825
826static void
827setiwsockopt(struct socket *so)
828{
829	int rc;
830	struct sockopt sopt;
831	int on = 1;
832
833	sopt.sopt_dir = SOPT_SET;
834	sopt.sopt_level = IPPROTO_TCP;
835	sopt.sopt_name = TCP_NODELAY;
836	sopt.sopt_val = (caddr_t)&on;
837	sopt.sopt_valsize = sizeof on;
838	sopt.sopt_td = NULL;
839	rc = sosetopt(so, &sopt);
840	if (rc) {
841		log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
842		    __func__, so, rc);
843	}
844}
845
846static void
847init_iwarp_socket(struct socket *so, void *arg)
848{
849
850	SOCKBUF_LOCK(&so->so_rcv);
851	soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
852	so->so_state |= SS_NBIO;
853	SOCKBUF_UNLOCK(&so->so_rcv);
854}
855
856static void
857uninit_iwarp_socket(struct socket *so)
858{
859
860	SOCKBUF_LOCK(&so->so_rcv);
861	soupcall_clear(so, SO_RCV);
862	SOCKBUF_UNLOCK(&so->so_rcv);
863}
864
865static void
866process_data(struct c4iw_ep *ep)
867{
868	int disconnect = 0;
869
870	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
871	    ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
872
873	switch (ep->com.state) {
874	case MPA_REQ_SENT:
875		disconnect = process_mpa_reply(ep);
876		break;
877	case MPA_REQ_WAIT:
878		disconnect = process_mpa_request(ep);
879		if (disconnect)
880			/* Refered in process_newconn() */
881			c4iw_put_ep(&ep->parent_ep->com);
882		break;
883	default:
884		if (sbused(&ep->com.so->so_rcv))
885			log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
886			    "state %d, so %p, so_state 0x%x, sbused %u\n",
887			    __func__, ep, ep->com.state, ep->com.so,
888			    ep->com.so->so_state, sbused(&ep->com.so->so_rcv));
889		break;
890	}
891	if (disconnect)
892		c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
893
894}
895
896static void
897process_connected(struct c4iw_ep *ep)
898{
899	struct socket *so = ep->com.so;
900
901	if ((so->so_state & SS_ISCONNECTED) && !so->so_error) {
902		if (send_mpa_req(ep))
903			goto err;
904	} else {
905		connect_reply_upcall(ep, -so->so_error);
906		goto err;
907	}
908	return;
909err:
910	close_socket(so);
911	ep->com.state = DEAD;
912	c4iw_put_ep(&ep->com);
913	return;
914}
915
916static inline int c4iw_zero_addr(struct sockaddr *addr)
917{
918	struct in6_addr *ip6;
919
920	if (addr->sa_family == AF_INET)
921		return IN_ZERONET(
922			ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr));
923	else {
924		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
925		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
926				ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
927	}
928}
929
930static inline int c4iw_loopback_addr(struct sockaddr *addr)
931{
932	if (addr->sa_family == AF_INET)
933		return IN_LOOPBACK(
934			ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr));
935	else
936		return IN6_IS_ADDR_LOOPBACK(
937				&((struct sockaddr_in6 *) addr)->sin6_addr);
938}
939
940static inline int c4iw_any_addr(struct sockaddr *addr)
941{
942	return c4iw_zero_addr(addr) || c4iw_loopback_addr(addr);
943}
944
945static void
946process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so)
947{
948	struct c4iw_listen_ep *real_lep = NULL;
949	struct c4iw_ep *new_ep = NULL;
950	int ret = 0;
951
952	MPASS(new_so != NULL);
953
954	if (c4iw_any_addr((struct sockaddr *)&master_lep->com.local_addr)) {
955		/* Here we need to find the 'real_lep' that belongs to the
956		 * incomming socket's network interface, such that the newly
957		 * created 'ep' can be attached to the real 'lep'.
958		 */
959		real_lep = find_real_listen_ep(master_lep, new_so);
960		if (real_lep == NULL) {
961			CTR2(KTR_IW_CXGBE, "%s: Could not find the real listen "
962					"ep for sock: %p", __func__, new_so);
963			log(LOG_ERR,"%s: Could not find the real listen ep for "
964					"sock: %p\n", __func__, new_so);
965			/* FIXME: properly free the 'new_so' in failure case.
966			 * Use of soabort() and  soclose() are not legal
967			 * here(before soaccept()).
968			 */
969			return;
970		}
971	} else /* for Non-Wildcard address, master_lep is always the real_lep */
972		real_lep = master_lep;
973
974	new_ep = alloc_ep(sizeof(*new_ep), GFP_KERNEL);
975
976	CTR6(KTR_IW_CXGBE, "%s: master_lep %p, real_lep: %p, new ep %p, "
977	    "listening so %p, new so %p", __func__, master_lep, real_lep,
978	    new_ep, master_lep->com.so, new_so);
979
980	new_ep->com.dev = real_lep->com.dev;
981	new_ep->com.so = new_so;
982	new_ep->com.cm_id = NULL;
983	new_ep->com.thread = real_lep->com.thread;
984	new_ep->parent_ep = real_lep;
985
986	GET_LOCAL_ADDR(&new_ep->com.local_addr, new_so);
987	GET_REMOTE_ADDR(&new_ep->com.remote_addr, new_so);
988	c4iw_get_ep(&real_lep->com);
989	init_timer(&new_ep->timer);
990	new_ep->com.state = MPA_REQ_WAIT;
991	START_EP_TIMER(new_ep);
992
993	setiwsockopt(new_so);
994
995	/* MPA request might have been queued up on the socket already, so we
996	 * initialize the socket/upcall_handler under lock to prevent processing
997	 * MPA request on another thread(via process_req()) simultaniously.
998	 */
999	c4iw_get_ep(&new_ep->com); /* Dereferenced at the end below, this is to
1000				      avoid freeing of ep before ep unlock. */
1001	mutex_lock(&new_ep->com.mutex);
1002	init_iwarp_socket(new_so, &new_ep->com);
1003
1004	ret = process_mpa_request(new_ep);
1005	if (ret) {
1006		/* ABORT */
1007		c4iw_ep_disconnect(new_ep, 1, GFP_KERNEL);
1008		c4iw_put_ep(&real_lep->com);
1009	}
1010	mutex_unlock(&new_ep->com.mutex);
1011	c4iw_put_ep(&new_ep->com);
1012	return;
1013}
1014
1015static int
1016add_ep_to_req_list(struct c4iw_ep *ep, int new_ep_event)
1017{
1018	unsigned long flag;
1019
1020	spin_lock_irqsave(&req_lock, flag);
1021	if (ep && ep->com.so) {
1022		ep->com.ep_events |= new_ep_event;
1023		if (!ep->com.entry.tqe_prev) {
1024			c4iw_get_ep(&ep->com);
1025			TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
1026			queue_work(c4iw_taskq, &c4iw_task);
1027		}
1028	}
1029	spin_unlock_irqrestore(&req_lock, flag);
1030
1031	return (0);
1032}
1033
1034static int
1035c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
1036{
1037	struct c4iw_ep *ep = arg;
1038
1039	CTR6(KTR_IW_CXGBE,
1040	    "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
1041	    __func__, so, so->so_state, ep, states[ep->com.state],
1042	    ep->com.entry.tqe_prev);
1043
1044	MPASS(ep->com.so == so);
1045	/*
1046	 * Wake up any threads waiting in rdma_init()/rdma_fini(),
1047	 * with locks held.
1048	 */
1049	if (so->so_error)
1050		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1051	add_ep_to_req_list(ep, C4IW_EVENT_SOCKET);
1052
1053	return (SU_OK);
1054}
1055
1056
1057static int
1058terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1059{
1060	struct adapter *sc = iq->adapter;
1061	const struct cpl_rdma_terminate *cpl = mtod(m, const void *);
1062	unsigned int tid = GET_TID(cpl);
1063	struct toepcb *toep = lookup_tid(sc, tid);
1064	struct socket *so;
1065	struct c4iw_ep *ep;
1066
1067	INP_WLOCK(toep->inp);
1068	so = inp_inpcbtosocket(toep->inp);
1069	ep = so->so_rcv.sb_upcallarg;
1070	INP_WUNLOCK(toep->inp);
1071
1072	CTR3(KTR_IW_CXGBE, "%s: so %p, ep %p", __func__, so, ep);
1073	add_ep_to_req_list(ep, C4IW_EVENT_TERM);
1074
1075	return 0;
1076}
1077
1078static struct socket *
1079dequeue_socket(struct socket *head)
1080{
1081	struct socket *so;
1082	struct sockaddr_in *remote;
1083
1084	ACCEPT_LOCK();
1085	so = TAILQ_FIRST(&head->so_comp);
1086	if (!so) {
1087		ACCEPT_UNLOCK();
1088		return NULL;
1089	}
1090
1091	SOCK_LOCK(so);
1092	/*
1093	 * Before changing the flags on the socket, we have to bump the
1094	 * reference count.  Otherwise, if the protocol calls sofree(),
1095	 * the socket will be released due to a zero refcount.
1096	 */
1097	soref(so);
1098	TAILQ_REMOVE(&head->so_comp, so, so_list);
1099	head->so_qlen--;
1100	so->so_qstate &= ~SQ_COMP;
1101	so->so_head = NULL;
1102	so->so_state |= SS_NBIO;
1103	SOCK_UNLOCK(so);
1104	ACCEPT_UNLOCK();
1105	remote = NULL;
1106	if (soaccept(so, (struct sockaddr **)&remote) != 0) {
1107		soclose(so);
1108		so = NULL;
1109	}
1110	free(remote, M_SONAME);
1111	return so;
1112}
1113
1114static void
1115process_socket_event(struct c4iw_ep *ep)
1116{
1117	int state = ep->com.state;
1118	struct socket *so = ep->com.so;
1119
1120	if (ep->com.state == DEAD) {
1121		CTR3(KTR_IW_CXGBE, "%s: Pending socket event discarded "
1122			"ep %p ep_state %s", __func__, ep, states[state]);
1123		return;
1124	}
1125
1126	CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
1127	    "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
1128	    so->so_error, so->so_rcv.sb_state, ep, states[state]);
1129
1130	if (state == CONNECTING) {
1131		process_connected(ep);
1132		return;
1133	}
1134
1135	if (state == LISTEN) {
1136		struct c4iw_listen_ep *lep = (struct c4iw_listen_ep *)ep;
1137		struct socket *new_so;
1138
1139		while ((new_so = dequeue_socket(so)) != NULL) {
1140			process_newconn(lep, new_so);
1141		}
1142		return;
1143	}
1144
1145	/* connection error */
1146	if (so->so_error) {
1147		process_conn_error(ep);
1148		return;
1149	}
1150
1151	/* peer close */
1152	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state <= CLOSING) {
1153		process_peer_close(ep);
1154		/*
1155		 * check whether socket disconnect event is pending before
1156		 * returning. Fallthrough if yes.
1157		 */
1158		if (!(so->so_state & SS_ISDISCONNECTED))
1159			return;
1160	}
1161
1162	/* close complete */
1163	if (so->so_state & SS_ISDISCONNECTED) {
1164		process_close_complete(ep);
1165		return;
1166	}
1167
1168	/* rx data */
1169	process_data(ep);
1170}
1171
1172SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters");
1173
1174static int dack_mode = 0;
1175SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0,
1176		"Delayed ack mode (default = 0)");
1177
1178int c4iw_max_read_depth = 8;
1179SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0,
1180		"Per-connection max ORD/IRD (default = 8)");
1181
1182static int enable_tcp_timestamps;
1183SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0,
1184		"Enable tcp timestamps (default = 0)");
1185
1186static int enable_tcp_sack;
1187SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0,
1188		"Enable tcp SACK (default = 0)");
1189
1190static int enable_tcp_window_scaling = 1;
1191SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0,
1192		"Enable tcp window scaling (default = 1)");
1193
1194int c4iw_debug = 1;
1195SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0,
1196		"Enable debug logging (default = 0)");
1197
1198static int peer2peer = 1;
1199SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0,
1200		"Support peer2peer ULPs (default = 1)");
1201
1202static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
1203SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0,
1204		"RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)");
1205
1206static int ep_timeout_secs = 60;
1207SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0,
1208		"CM Endpoint operation timeout in seconds (default = 60)");
1209
1210static int mpa_rev = 1;
1211SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0,
1212		"MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
1213
1214static int markers_enabled;
1215SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0,
1216		"Enable MPA MARKERS (default(0) = disabled)");
1217
1218static int crc_enabled = 1;
1219SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0,
1220		"Enable MPA CRC (default(1) = enabled)");
1221
1222static int rcv_win = 256 * 1024;
1223SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0,
1224		"TCP receive window in bytes (default = 256KB)");
1225
1226static int snd_win = 128 * 1024;
1227SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0,
1228		"TCP send window in bytes (default = 128KB)");
1229
1230static void
1231start_ep_timer(struct c4iw_ep *ep)
1232{
1233
1234	if (timer_pending(&ep->timer)) {
1235		CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
1236		printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
1237		    ep);
1238		return;
1239	}
1240	clear_bit(TIMEOUT, &ep->com.flags);
1241	c4iw_get_ep(&ep->com);
1242	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
1243	ep->timer.data = (unsigned long)ep;
1244	ep->timer.function = ep_timeout;
1245	add_timer(&ep->timer);
1246}
1247
1248static int
1249stop_ep_timer(struct c4iw_ep *ep)
1250{
1251
1252	del_timer_sync(&ep->timer);
1253	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
1254		c4iw_put_ep(&ep->com);
1255		return 0;
1256	}
1257	return 1;
1258}
1259
1260static void *
1261alloc_ep(int size, gfp_t gfp)
1262{
1263	struct c4iw_ep_common *epc;
1264
1265	epc = kzalloc(size, gfp);
1266	if (epc == NULL)
1267		return (NULL);
1268
1269	kref_init(&epc->kref);
1270	mutex_init(&epc->mutex);
1271	c4iw_init_wr_wait(&epc->wr_wait);
1272
1273	return (epc);
1274}
1275
1276void _c4iw_free_ep(struct kref *kref)
1277{
1278	struct c4iw_ep *ep;
1279	struct c4iw_ep_common *epc;
1280
1281	ep = container_of(kref, struct c4iw_ep, com.kref);
1282	epc = &ep->com;
1283	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
1284	    __func__, epc));
1285	if (test_bit(QP_REFERENCED, &ep->com.flags))
1286		deref_qp(ep);
1287	CTR4(KTR_IW_CXGBE, "%s: ep %p, history 0x%lx, flags 0x%lx",
1288	    __func__, ep, epc->history, epc->flags);
1289	kfree(ep);
1290}
1291
1292static void release_ep_resources(struct c4iw_ep *ep)
1293{
1294	CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
1295	set_bit(RELEASE_RESOURCES, &ep->com.flags);
1296	c4iw_put_ep(&ep->com);
1297	CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
1298}
1299
1300static int
1301send_mpa_req(struct c4iw_ep *ep)
1302{
1303	int mpalen;
1304	struct mpa_message *mpa;
1305	struct mpa_v2_conn_params mpa_v2_params;
1306	struct mbuf *m;
1307	char mpa_rev_to_use = mpa_rev;
1308	int err = 0;
1309
1310	if (ep->retry_with_mpa_v1)
1311		mpa_rev_to_use = 1;
1312	mpalen = sizeof(*mpa) + ep->plen;
1313	if (mpa_rev_to_use == 2)
1314		mpalen += sizeof(struct mpa_v2_conn_params);
1315
1316	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1317	if (mpa == NULL) {
1318		err = -ENOMEM;
1319		CTR3(KTR_IW_CXGBE, "%s:smr1 ep: %p , error: %d",
1320				__func__, ep, err);
1321		goto err;
1322	}
1323
1324	memset(mpa, 0, mpalen);
1325	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
1326	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
1327		(markers_enabled ? MPA_MARKERS : 0) |
1328		(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
1329	mpa->private_data_size = htons(ep->plen);
1330	mpa->revision = mpa_rev_to_use;
1331
1332	if (mpa_rev_to_use == 1) {
1333		ep->tried_with_mpa_v1 = 1;
1334		ep->retry_with_mpa_v1 = 0;
1335	}
1336
1337	if (mpa_rev_to_use == 2) {
1338		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
1339					    sizeof(struct mpa_v2_conn_params));
1340		mpa_v2_params.ird = htons((u16)ep->ird);
1341		mpa_v2_params.ord = htons((u16)ep->ord);
1342
1343		if (peer2peer) {
1344			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1345
1346			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1347				mpa_v2_params.ord |=
1348				    htons(MPA_V2_RDMA_WRITE_RTR);
1349			} else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1350				mpa_v2_params.ord |=
1351					htons(MPA_V2_RDMA_READ_RTR);
1352			}
1353		}
1354		memcpy(mpa->private_data, &mpa_v2_params,
1355			sizeof(struct mpa_v2_conn_params));
1356
1357		if (ep->plen) {
1358
1359			memcpy(mpa->private_data +
1360				sizeof(struct mpa_v2_conn_params),
1361				ep->mpa_pkt + sizeof(*mpa), ep->plen);
1362		}
1363	} else {
1364
1365		if (ep->plen)
1366			memcpy(mpa->private_data,
1367					ep->mpa_pkt + sizeof(*mpa), ep->plen);
1368		CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep);
1369	}
1370
1371	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1372	if (m == NULL) {
1373		err = -ENOMEM;
1374		CTR3(KTR_IW_CXGBE, "%s:smr2 ep: %p , error: %d",
1375				__func__, ep, err);
1376		free(mpa, M_CXGBE);
1377		goto err;
1378	}
1379	m_copyback(m, 0, mpalen, (void *)mpa);
1380	free(mpa, M_CXGBE);
1381
1382	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1383			ep->com.thread);
1384	if (err) {
1385		CTR3(KTR_IW_CXGBE, "%s:smr3 ep: %p , error: %d",
1386				__func__, ep, err);
1387		goto err;
1388	}
1389
1390	START_EP_TIMER(ep);
1391	ep->com.state = MPA_REQ_SENT;
1392	ep->mpa_attr.initiator = 1;
1393	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1394	return 0;
1395err:
1396	connect_reply_upcall(ep, err);
1397	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1398	return err;
1399}
1400
1401static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1402{
1403	int mpalen ;
1404	struct mpa_message *mpa;
1405	struct mpa_v2_conn_params mpa_v2_params;
1406	struct mbuf *m;
1407	int err;
1408
1409	CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid,
1410	    ep->plen);
1411
1412	mpalen = sizeof(*mpa) + plen;
1413
1414	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1415
1416		mpalen += sizeof(struct mpa_v2_conn_params);
1417		CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep,
1418		    ep->mpa_attr.version, mpalen);
1419	}
1420
1421	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1422	if (mpa == NULL)
1423		return (-ENOMEM);
1424
1425	memset(mpa, 0, mpalen);
1426	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1427	mpa->flags = MPA_REJECT;
1428	mpa->revision = mpa_rev;
1429	mpa->private_data_size = htons(plen);
1430
1431	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1432
1433		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1434		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
1435					    sizeof(struct mpa_v2_conn_params));
1436		mpa_v2_params.ird = htons(((u16)ep->ird) |
1437				(peer2peer ? MPA_V2_PEER2PEER_MODEL :
1438				 0));
1439		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1440					(p2p_type ==
1441					 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1442					 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1443					 FW_RI_INIT_P2PTYPE_READ_REQ ?
1444					 MPA_V2_RDMA_READ_RTR : 0) : 0));
1445		memcpy(mpa->private_data, &mpa_v2_params,
1446				sizeof(struct mpa_v2_conn_params));
1447
1448		if (ep->plen)
1449			memcpy(mpa->private_data +
1450				sizeof(struct mpa_v2_conn_params), pdata, plen);
1451		CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
1452		    mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
1453	} else
1454		if (plen)
1455			memcpy(mpa->private_data, pdata, plen);
1456
1457	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1458	if (m == NULL) {
1459		free(mpa, M_CXGBE);
1460		return (-ENOMEM);
1461	}
1462	m_copyback(m, 0, mpalen, (void *)mpa);
1463	free(mpa, M_CXGBE);
1464
1465	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
1466	if (!err)
1467		ep->snd_seq += mpalen;
1468	CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err);
1469	return err;
1470}
1471
1472static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1473{
1474	int mpalen;
1475	struct mpa_message *mpa;
1476	struct mbuf *m;
1477	struct mpa_v2_conn_params mpa_v2_params;
1478	int err;
1479
1480	CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep);
1481
1482	mpalen = sizeof(*mpa) + plen;
1483
1484	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1485
1486		CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep,
1487		    ep->mpa_attr.version);
1488		mpalen += sizeof(struct mpa_v2_conn_params);
1489	}
1490
1491	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1492	if (mpa == NULL)
1493		return (-ENOMEM);
1494
1495	memset(mpa, 0, sizeof(*mpa));
1496	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1497	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
1498		(markers_enabled ? MPA_MARKERS : 0);
1499	mpa->revision = ep->mpa_attr.version;
1500	mpa->private_data_size = htons(plen);
1501
1502	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1503
1504		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1505		mpa->private_data_size +=
1506			htons(sizeof(struct mpa_v2_conn_params));
1507		mpa_v2_params.ird = htons((u16)ep->ird);
1508		mpa_v2_params.ord = htons((u16)ep->ord);
1509		CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep,
1510		    ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord);
1511
1512		if (peer2peer && (ep->mpa_attr.p2p_type !=
1513			FW_RI_INIT_P2PTYPE_DISABLED)) {
1514
1515			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1516
1517			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1518
1519				mpa_v2_params.ord |=
1520					htons(MPA_V2_RDMA_WRITE_RTR);
1521				CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d",
1522				    __func__, ep, p2p_type, mpa_v2_params.ird,
1523				    mpa_v2_params.ord);
1524			}
1525			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1526
1527				mpa_v2_params.ord |=
1528					htons(MPA_V2_RDMA_READ_RTR);
1529				CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d",
1530				    __func__, ep, p2p_type, mpa_v2_params.ird,
1531				    mpa_v2_params.ord);
1532			}
1533		}
1534
1535		memcpy(mpa->private_data, &mpa_v2_params,
1536			sizeof(struct mpa_v2_conn_params));
1537
1538		if (ep->plen)
1539			memcpy(mpa->private_data +
1540				sizeof(struct mpa_v2_conn_params), pdata, plen);
1541	} else
1542		if (plen)
1543			memcpy(mpa->private_data, pdata, plen);
1544
1545	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1546	if (m == NULL) {
1547		free(mpa, M_CXGBE);
1548		return (-ENOMEM);
1549	}
1550	m_copyback(m, 0, mpalen, (void *)mpa);
1551	free(mpa, M_CXGBE);
1552
1553
1554	ep->com.state = MPA_REP_SENT;
1555	ep->snd_seq += mpalen;
1556	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1557			ep->com.thread);
1558	CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err);
1559	return err;
1560}
1561
1562
1563
1564static void close_complete_upcall(struct c4iw_ep *ep, int status)
1565{
1566	struct iw_cm_event event;
1567
1568	CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep);
1569	memset(&event, 0, sizeof(event));
1570	event.event = IW_CM_EVENT_CLOSE;
1571	event.status = status;
1572
1573	if (ep->com.cm_id) {
1574
1575		CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep);
1576		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1577		deref_cm_id(&ep->com);
1578		set_bit(CLOSE_UPCALL, &ep->com.history);
1579	}
1580	CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep);
1581}
1582
1583static int
1584send_abort(struct c4iw_ep *ep)
1585{
1586	struct socket *so = ep->com.so;
1587	struct sockopt sopt;
1588	int rc;
1589	struct linger l;
1590
1591	CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep, so,
1592	    states[ep->com.state], ep->hwtid);
1593
1594	l.l_onoff = 1;
1595	l.l_linger = 0;
1596
1597	/* linger_time of 0 forces RST to be sent */
1598	sopt.sopt_dir = SOPT_SET;
1599	sopt.sopt_level = SOL_SOCKET;
1600	sopt.sopt_name = SO_LINGER;
1601	sopt.sopt_val = (caddr_t)&l;
1602	sopt.sopt_valsize = sizeof l;
1603	sopt.sopt_td = NULL;
1604	rc = sosetopt(so, &sopt);
1605	if (rc != 0) {
1606		log(LOG_ERR, "%s: sosetopt(%p, linger = 0) failed with %d.\n",
1607		    __func__, so, rc);
1608	}
1609
1610	uninit_iwarp_socket(so);
1611	soclose(so);
1612	set_bit(ABORT_CONN, &ep->com.history);
1613
1614	/*
1615	 * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT
1616	 * request it has sent. But the current TOE driver is not propagating
1617	 * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work-
1618	 * around de-refererece 'ep' here instead of doing it in abort_rpl()
1619	 * handler(not yet implemented) of iw_cxgbe driver.
1620	 */
1621	release_ep_resources(ep);
1622
1623	return (0);
1624}
1625
1626static void peer_close_upcall(struct c4iw_ep *ep)
1627{
1628	struct iw_cm_event event;
1629
1630	CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep);
1631	memset(&event, 0, sizeof(event));
1632	event.event = IW_CM_EVENT_DISCONNECT;
1633
1634	if (ep->com.cm_id) {
1635
1636		CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep);
1637		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1638		set_bit(DISCONN_UPCALL, &ep->com.history);
1639	}
1640	CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep);
1641}
1642
1643static void peer_abort_upcall(struct c4iw_ep *ep)
1644{
1645	struct iw_cm_event event;
1646
1647	CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep);
1648	memset(&event, 0, sizeof(event));
1649	event.event = IW_CM_EVENT_CLOSE;
1650	event.status = -ECONNRESET;
1651
1652	if (ep->com.cm_id) {
1653
1654		CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep);
1655		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1656		deref_cm_id(&ep->com);
1657		set_bit(ABORT_UPCALL, &ep->com.history);
1658	}
1659	CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep);
1660}
1661
1662static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1663{
1664	struct iw_cm_event event;
1665
1666	CTR3(KTR_IW_CXGBE, "%s:cruB %p, status: %d", __func__, ep, status);
1667	memset(&event, 0, sizeof(event));
1668	event.event = IW_CM_EVENT_CONNECT_REPLY;
1669	event.status = ((status == -ECONNABORTED) || (status == -EPIPE)) ?
1670					-ECONNRESET : status;
1671	event.local_addr = ep->com.local_addr;
1672	event.remote_addr = ep->com.remote_addr;
1673
1674	if ((status == 0) || (status == -ECONNREFUSED)) {
1675
1676		if (!ep->tried_with_mpa_v1) {
1677
1678			CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
1679			/* this means MPA_v2 is used */
1680			event.ord = ep->ird;
1681			event.ird = ep->ord;
1682			event.private_data_len = ep->plen -
1683				sizeof(struct mpa_v2_conn_params);
1684			event.private_data = ep->mpa_pkt +
1685				sizeof(struct mpa_message) +
1686				sizeof(struct mpa_v2_conn_params);
1687		} else {
1688
1689			CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
1690			/* this means MPA_v1 is used */
1691			event.ord = c4iw_max_read_depth;
1692			event.ird = c4iw_max_read_depth;
1693			event.private_data_len = ep->plen;
1694			event.private_data = ep->mpa_pkt +
1695				sizeof(struct mpa_message);
1696		}
1697	}
1698
1699	if (ep->com.cm_id) {
1700
1701		CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep);
1702		set_bit(CONN_RPL_UPCALL, &ep->com.history);
1703		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1704	}
1705
1706	if(status == -ECONNABORTED) {
1707
1708		CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status);
1709		return;
1710	}
1711
1712	if (status < 0) {
1713
1714		CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status);
1715		deref_cm_id(&ep->com);
1716	}
1717
1718	CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep);
1719}
1720
1721static int connect_request_upcall(struct c4iw_ep *ep)
1722{
1723	struct iw_cm_event event;
1724	int ret;
1725
1726	CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep,
1727	    ep->tried_with_mpa_v1);
1728
1729	memset(&event, 0, sizeof(event));
1730	event.event = IW_CM_EVENT_CONNECT_REQUEST;
1731	event.local_addr = ep->com.local_addr;
1732	event.remote_addr = ep->com.remote_addr;
1733	event.provider_data = ep;
1734
1735	if (!ep->tried_with_mpa_v1) {
1736		/* this means MPA_v2 is used */
1737		event.ord = ep->ord;
1738		event.ird = ep->ird;
1739		event.private_data_len = ep->plen -
1740			sizeof(struct mpa_v2_conn_params);
1741		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1742			sizeof(struct mpa_v2_conn_params);
1743	} else {
1744
1745		/* this means MPA_v1 is used. Send max supported */
1746		event.ord = c4iw_max_read_depth;
1747		event.ird = c4iw_max_read_depth;
1748		event.private_data_len = ep->plen;
1749		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1750	}
1751
1752	c4iw_get_ep(&ep->com);
1753	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1754	    &event);
1755	if(ret) {
1756		CTR3(KTR_IW_CXGBE, "%s: ep %p, Failure while notifying event to"
1757			" IWCM, err:%d", __func__, ep, ret);
1758		c4iw_put_ep(&ep->com);
1759	} else
1760		/* Dereference parent_ep only in success case.
1761		 * In case of failure, parent_ep is dereferenced by the caller
1762		 * of process_mpa_request().
1763		 */
1764		c4iw_put_ep(&ep->parent_ep->com);
1765
1766	set_bit(CONNREQ_UPCALL, &ep->com.history);
1767	return ret;
1768}
1769
1770static void established_upcall(struct c4iw_ep *ep)
1771{
1772	struct iw_cm_event event;
1773
1774	CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep);
1775	memset(&event, 0, sizeof(event));
1776	event.event = IW_CM_EVENT_ESTABLISHED;
1777	event.ird = ep->ord;
1778	event.ord = ep->ird;
1779
1780	if (ep->com.cm_id) {
1781
1782		CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep);
1783		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1784		set_bit(ESTAB_UPCALL, &ep->com.history);
1785	}
1786	CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep);
1787}
1788
1789
1790#define RELAXED_IRD_NEGOTIATION 1
1791
1792/*
1793 * process_mpa_reply - process streaming mode MPA reply
1794 *
1795 * Returns:
1796 *
1797 * 0 upon success indicating a connect request was delivered to the ULP
1798 * or the mpa request is incomplete but valid so far.
1799 *
1800 * 1 if a failure requires the caller to close the connection.
1801 *
1802 * 2 if a failure requires the caller to abort the connection.
1803 */
1804static int process_mpa_reply(struct c4iw_ep *ep)
1805{
1806	struct mpa_message *mpa;
1807	struct mpa_v2_conn_params *mpa_v2_params;
1808	u16 plen;
1809	u16 resp_ird, resp_ord;
1810	u8 rtr_mismatch = 0, insuff_ird = 0;
1811	struct c4iw_qp_attributes attrs = {0};
1812	enum c4iw_qp_attr_mask mask;
1813	int err;
1814	struct mbuf *top, *m;
1815	int flags = MSG_DONTWAIT;
1816	struct uio uio;
1817	int disconnect = 0;
1818
1819	CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep);
1820
1821	/*
1822	 * Stop mpa timer.  If it expired, then
1823	 * we ignore the MPA reply.  process_timeout()
1824	 * will abort the connection.
1825	 */
1826	if (STOP_EP_TIMER(ep))
1827		return 0;
1828
1829	uio.uio_resid = 1000000;
1830	uio.uio_td = ep->com.thread;
1831	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
1832
1833	if (err) {
1834
1835		if (err == EWOULDBLOCK) {
1836
1837			CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep);
1838			START_EP_TIMER(ep);
1839			return 0;
1840		}
1841		err = -err;
1842		CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep);
1843		goto err;
1844	}
1845
1846	if (ep->com.so->so_rcv.sb_mb) {
1847
1848		CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep);
1849		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
1850		       __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
1851	}
1852
1853	m = top;
1854
1855	do {
1856
1857		CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep);
1858		/*
1859		 * If we get more than the supported amount of private data
1860		 * then we must fail this connection.
1861		 */
1862		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
1863
1864			CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep,
1865			    ep->mpa_pkt_len + m->m_len);
1866			err = (-EINVAL);
1867			goto err_stop_timer;
1868		}
1869
1870		/*
1871		 * copy the new data into our accumulation buffer.
1872		 */
1873		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
1874		ep->mpa_pkt_len += m->m_len;
1875		if (!m->m_next)
1876			m = m->m_nextpkt;
1877		else
1878			m = m->m_next;
1879	} while (m);
1880
1881	m_freem(top);
1882	/*
1883	 * if we don't even have the mpa message, then bail.
1884	 */
1885	if (ep->mpa_pkt_len < sizeof(*mpa)) {
1886		return 0;
1887	}
1888	mpa = (struct mpa_message *) ep->mpa_pkt;
1889
1890	/* Validate MPA header. */
1891	if (mpa->revision > mpa_rev) {
1892
1893		CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep,
1894		    mpa->revision, mpa_rev);
1895		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, "
1896				" Received = %d\n", __func__, mpa_rev, mpa->revision);
1897		err = -EPROTO;
1898		goto err_stop_timer;
1899	}
1900
1901	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1902
1903		CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep);
1904		err = -EPROTO;
1905		goto err_stop_timer;
1906	}
1907
1908	plen = ntohs(mpa->private_data_size);
1909
1910	/*
1911	 * Fail if there's too much private data.
1912	 */
1913	if (plen > MPA_MAX_PRIVATE_DATA) {
1914
1915		CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep);
1916		err = -EPROTO;
1917		goto err_stop_timer;
1918	}
1919
1920	/*
1921	 * If plen does not account for pkt size
1922	 */
1923	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1924
1925		CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep);
1926		STOP_EP_TIMER(ep);
1927		err = -EPROTO;
1928		goto err_stop_timer;
1929	}
1930
1931	ep->plen = (u8) plen;
1932
1933	/*
1934	 * If we don't have all the pdata yet, then bail.
1935	 * We'll continue process when more data arrives.
1936	 */
1937	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
1938
1939		CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep);
1940		return 0;
1941	}
1942
1943	if (mpa->flags & MPA_REJECT) {
1944
1945		CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep);
1946		err = -ECONNREFUSED;
1947		goto err_stop_timer;
1948	}
1949
1950	/*
1951	 * If we get here we have accumulated the entire mpa
1952	 * start reply message including private data. And
1953	 * the MPA header is valid.
1954	 */
1955	ep->com.state = FPDU_MODE;
1956	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1957	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1958	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1959	ep->mpa_attr.version = mpa->revision;
1960	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1961
1962	if (mpa->revision == 2) {
1963
1964		CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep);
1965		ep->mpa_attr.enhanced_rdma_conn =
1966			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1967
1968		if (ep->mpa_attr.enhanced_rdma_conn) {
1969
1970			CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep);
1971			mpa_v2_params = (struct mpa_v2_conn_params *)
1972				(ep->mpa_pkt + sizeof(*mpa));
1973			resp_ird = ntohs(mpa_v2_params->ird) &
1974				MPA_V2_IRD_ORD_MASK;
1975			resp_ord = ntohs(mpa_v2_params->ord) &
1976				MPA_V2_IRD_ORD_MASK;
1977
1978			/*
1979			 * This is a double-check. Ideally, below checks are
1980			 * not required since ird/ord stuff has been taken
1981			 * care of in c4iw_accept_cr
1982			 */
1983			if (ep->ird < resp_ord) {
1984				if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1985				   ep->com.dev->rdev.adap->params.max_ordird_qp)
1986					ep->ird = resp_ord;
1987				else
1988					insuff_ird = 1;
1989			} else if (ep->ird > resp_ord) {
1990				ep->ird = resp_ord;
1991			}
1992			if (ep->ord > resp_ird) {
1993				if (RELAXED_IRD_NEGOTIATION)
1994					ep->ord = resp_ird;
1995				else
1996					insuff_ird = 1;
1997			}
1998			if (insuff_ird) {
1999				err = -ENOMEM;
2000				ep->ird = resp_ord;
2001				ep->ord = resp_ird;
2002			}
2003
2004			if (ntohs(mpa_v2_params->ird) &
2005				MPA_V2_PEER2PEER_MODEL) {
2006
2007				CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep);
2008				if (ntohs(mpa_v2_params->ord) &
2009					MPA_V2_RDMA_WRITE_RTR) {
2010
2011					CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep);
2012					ep->mpa_attr.p2p_type =
2013						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
2014				}
2015				else if (ntohs(mpa_v2_params->ord) &
2016					MPA_V2_RDMA_READ_RTR) {
2017
2018					CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep);
2019					ep->mpa_attr.p2p_type =
2020						FW_RI_INIT_P2PTYPE_READ_REQ;
2021				}
2022			}
2023		}
2024	} else {
2025
2026		CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep);
2027
2028		if (mpa->revision == 1) {
2029
2030			CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep);
2031
2032			if (peer2peer) {
2033
2034				CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep);
2035				ep->mpa_attr.p2p_type = p2p_type;
2036			}
2037		}
2038	}
2039
2040	if (set_tcpinfo(ep)) {
2041
2042		CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep);
2043		printf("%s set_tcpinfo error\n", __func__);
2044		err = -ECONNRESET;
2045		goto err;
2046	}
2047
2048	CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, "
2049	    "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__,
2050	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
2051	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
2052	    ep->mpa_attr.p2p_type);
2053
2054	/*
2055	 * If responder's RTR does not match with that of initiator, assign
2056	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
2057	 * generated when moving QP to RTS state.
2058	 * A TERM message will be sent after QP has moved to RTS state
2059	 */
2060	if ((ep->mpa_attr.version == 2) && peer2peer &&
2061		(ep->mpa_attr.p2p_type != p2p_type)) {
2062
2063		CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep);
2064		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
2065		rtr_mismatch = 1;
2066	}
2067
2068
2069	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
2070	attrs.mpa_attr = ep->mpa_attr;
2071	attrs.max_ird = ep->ird;
2072	attrs.max_ord = ep->ord;
2073	attrs.llp_stream_handle = ep;
2074	attrs.next_state = C4IW_QP_STATE_RTS;
2075
2076	mask = C4IW_QP_ATTR_NEXT_STATE |
2077		C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
2078		C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
2079
2080	/* bind QP and TID with INIT_WR */
2081	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
2082
2083	if (err) {
2084
2085		CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep);
2086		goto err;
2087	}
2088
2089	/*
2090	 * If responder's RTR requirement did not match with what initiator
2091	 * supports, generate TERM message
2092	 */
2093	if (rtr_mismatch) {
2094
2095		CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep);
2096		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
2097		attrs.layer_etype = LAYER_MPA | DDP_LLP;
2098		attrs.ecode = MPA_NOMATCH_RTR;
2099		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2100		attrs.send_term = 1;
2101		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2102			C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2103		err = -ENOMEM;
2104		disconnect = 1;
2105		goto out;
2106	}
2107
2108	/*
2109	 * Generate TERM if initiator IRD is not sufficient for responder
2110	 * provided ORD. Currently, we do the same behaviour even when
2111	 * responder provided IRD is also not sufficient as regards to
2112	 * initiator ORD.
2113	 */
2114	if (insuff_ird) {
2115
2116		CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep);
2117		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
2118				__func__);
2119		attrs.layer_etype = LAYER_MPA | DDP_LLP;
2120		attrs.ecode = MPA_INSUFF_IRD;
2121		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2122		attrs.send_term = 1;
2123		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2124			C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2125		err = -ENOMEM;
2126		disconnect = 1;
2127		goto out;
2128	}
2129	goto out;
2130err_stop_timer:
2131	STOP_EP_TIMER(ep);
2132err:
2133	disconnect = 2;
2134out:
2135	connect_reply_upcall(ep, err);
2136	CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep);
2137	return disconnect;
2138}
2139
2140/*
2141 * process_mpa_request - process streaming mode MPA request
2142 *
2143 * Returns:
2144 *
2145 * 0 upon success indicating a connect request was delivered to the ULP
2146 * or the mpa request is incomplete but valid so far.
2147 *
2148 * 1 if a failure requires the caller to close the connection.
2149 *
2150 * 2 if a failure requires the caller to abort the connection.
2151 */
2152static int
2153process_mpa_request(struct c4iw_ep *ep)
2154{
2155	struct mpa_message *mpa;
2156	struct mpa_v2_conn_params *mpa_v2_params;
2157	u16 plen;
2158	int flags = MSG_DONTWAIT;
2159	int rc;
2160	struct iovec iov;
2161	struct uio uio;
2162	enum c4iw_ep_state state = ep->com.state;
2163
2164	CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]);
2165
2166	if (state != MPA_REQ_WAIT)
2167		return 0;
2168
2169	iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len];
2170	iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
2171	uio.uio_iov = &iov;
2172	uio.uio_iovcnt = 1;
2173	uio.uio_offset = 0;
2174	uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
2175	uio.uio_segflg = UIO_SYSSPACE;
2176	uio.uio_rw = UIO_READ;
2177	uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */
2178
2179	rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags);
2180	if (rc == EAGAIN)
2181		return 0;
2182	else if (rc)
2183		goto err_stop_timer;
2184
2185	KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data",
2186	    __func__, ep->com.so));
2187	ep->mpa_pkt_len += uio.uio_offset;
2188
2189	/*
2190	 * If we get more than the supported amount of private data then we must
2191	 * fail this connection.  XXX: check so_rcv->sb_cc, or peek with another
2192	 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last
2193	 * byte is filled by the soreceive above.
2194	 */
2195
2196	/* Don't even have the MPA message.  Wait for more data to arrive. */
2197	if (ep->mpa_pkt_len < sizeof(*mpa))
2198		return 0;
2199	mpa = (struct mpa_message *) ep->mpa_pkt;
2200
2201	/*
2202	 * Validate MPA Header.
2203	 */
2204	if (mpa->revision > mpa_rev) {
2205		log(LOG_ERR, "%s: MPA version mismatch. Local = %d,"
2206		    " Received = %d\n", __func__, mpa_rev, mpa->revision);
2207		goto err_stop_timer;
2208	}
2209
2210	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
2211		goto err_stop_timer;
2212
2213	/*
2214	 * Fail if there's too much private data.
2215	 */
2216	plen = ntohs(mpa->private_data_size);
2217	if (plen > MPA_MAX_PRIVATE_DATA)
2218		goto err_stop_timer;
2219
2220	/*
2221	 * If plen does not account for pkt size
2222	 */
2223	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
2224		goto err_stop_timer;
2225
2226	ep->plen = (u8) plen;
2227
2228	/*
2229	 * If we don't have all the pdata yet, then bail.
2230	 */
2231	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
2232		return 0;
2233
2234	/*
2235	 * If we get here we have accumulated the entire mpa
2236	 * start reply message including private data.
2237	 */
2238	ep->mpa_attr.initiator = 0;
2239	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
2240	ep->mpa_attr.recv_marker_enabled = markers_enabled;
2241	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
2242	ep->mpa_attr.version = mpa->revision;
2243	if (mpa->revision == 1)
2244		ep->tried_with_mpa_v1 = 1;
2245	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
2246
2247	if (mpa->revision == 2) {
2248		ep->mpa_attr.enhanced_rdma_conn =
2249		    mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
2250		if (ep->mpa_attr.enhanced_rdma_conn) {
2251			mpa_v2_params = (struct mpa_v2_conn_params *)
2252				(ep->mpa_pkt + sizeof(*mpa));
2253			ep->ird = ntohs(mpa_v2_params->ird) &
2254				MPA_V2_IRD_ORD_MASK;
2255			ep->ird = min_t(u32, ep->ird,
2256					cur_max_read_depth(ep->com.dev));
2257			ep->ord = ntohs(mpa_v2_params->ord) &
2258				MPA_V2_IRD_ORD_MASK;
2259			ep->ord = min_t(u32, ep->ord,
2260					cur_max_read_depth(ep->com.dev));
2261			CTR3(KTR_IW_CXGBE, "%s initiator ird %u ord %u\n",
2262				 __func__, ep->ird, ep->ord);
2263			if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
2264				if (peer2peer) {
2265					if (ntohs(mpa_v2_params->ord) &
2266							MPA_V2_RDMA_WRITE_RTR)
2267						ep->mpa_attr.p2p_type =
2268						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
2269					else if (ntohs(mpa_v2_params->ord) &
2270							MPA_V2_RDMA_READ_RTR)
2271						ep->mpa_attr.p2p_type =
2272						FW_RI_INIT_P2PTYPE_READ_REQ;
2273				}
2274		}
2275	} else if (mpa->revision == 1 && peer2peer)
2276		ep->mpa_attr.p2p_type = p2p_type;
2277
2278	if (set_tcpinfo(ep))
2279		goto err_stop_timer;
2280
2281	CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, "
2282	    "xmit_marker_enabled = %d, version = %d", __func__,
2283	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
2284	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
2285
2286	ep->com.state = MPA_REQ_RCVD;
2287	STOP_EP_TIMER(ep);
2288
2289	/* drive upcall */
2290	if (ep->parent_ep->com.state != DEAD)
2291		if (connect_request_upcall(ep))
2292			goto err_out;
2293	return 0;
2294
2295err_stop_timer:
2296	STOP_EP_TIMER(ep);
2297err_out:
2298	return 2;
2299}
2300
2301/*
2302 * Upcall from the adapter indicating data has been transmitted.
2303 * For us its just the single MPA request or reply.  We can now free
2304 * the skb holding the mpa message.
2305 */
2306int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2307{
2308	int err;
2309	struct c4iw_ep *ep = to_ep(cm_id);
2310	int abort = 0;
2311
2312	mutex_lock(&ep->com.mutex);
2313	CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep);
2314
2315	if ((ep->com.state == DEAD) ||
2316			(ep->com.state != MPA_REQ_RCVD)) {
2317
2318		CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep);
2319		mutex_unlock(&ep->com.mutex);
2320		c4iw_put_ep(&ep->com);
2321		return -ECONNRESET;
2322	}
2323	set_bit(ULP_REJECT, &ep->com.history);
2324
2325	if (mpa_rev == 0) {
2326
2327		CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep);
2328		abort = 1;
2329	}
2330	else {
2331
2332		CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep);
2333		abort = send_mpa_reject(ep, pdata, pdata_len);
2334	}
2335	STOP_EP_TIMER(ep);
2336	err = c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
2337	mutex_unlock(&ep->com.mutex);
2338	c4iw_put_ep(&ep->com);
2339	CTR3(KTR_IW_CXGBE, "%s:crc4 %p, err: %d", __func__, ep, err);
2340	return 0;
2341}
2342
2343int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2344{
2345	int err;
2346	struct c4iw_qp_attributes attrs = {0};
2347	enum c4iw_qp_attr_mask mask;
2348	struct c4iw_ep *ep = to_ep(cm_id);
2349	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2350	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2351	int abort = 0;
2352
2353	mutex_lock(&ep->com.mutex);
2354	CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep);
2355
2356	if ((ep->com.state == DEAD) ||
2357			(ep->com.state != MPA_REQ_RCVD)) {
2358
2359		CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep);
2360		err = -ECONNRESET;
2361		goto err_out;
2362	}
2363
2364	BUG_ON(!qp);
2365
2366	set_bit(ULP_ACCEPT, &ep->com.history);
2367
2368	if ((conn_param->ord > c4iw_max_read_depth) ||
2369		(conn_param->ird > c4iw_max_read_depth)) {
2370
2371		CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep);
2372		err = -EINVAL;
2373		goto err_abort;
2374	}
2375
2376	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2377
2378		CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep);
2379
2380		if (conn_param->ord > ep->ird) {
2381			if (RELAXED_IRD_NEGOTIATION) {
2382				conn_param->ord = ep->ird;
2383			} else {
2384				ep->ird = conn_param->ird;
2385				ep->ord = conn_param->ord;
2386				send_mpa_reject(ep, conn_param->private_data,
2387						conn_param->private_data_len);
2388				err = -ENOMEM;
2389				goto err_abort;
2390			}
2391		}
2392		if (conn_param->ird < ep->ord) {
2393			if (RELAXED_IRD_NEGOTIATION &&
2394			    ep->ord <= h->rdev.adap->params.max_ordird_qp) {
2395				conn_param->ird = ep->ord;
2396			} else {
2397				err = -ENOMEM;
2398				goto err_abort;
2399			}
2400		}
2401	}
2402	ep->ird = conn_param->ird;
2403	ep->ord = conn_param->ord;
2404
2405	if (ep->mpa_attr.version == 1) {
2406		if (peer2peer && ep->ird == 0)
2407			ep->ird = 1;
2408	} else {
2409		if (peer2peer &&
2410		    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
2411		    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
2412			ep->ird = 1;
2413	}
2414
2415	CTR4(KTR_IW_CXGBE, "%s %d ird %d ord %d\n", __func__, __LINE__,
2416			ep->ird, ep->ord);
2417
2418	ep->com.cm_id = cm_id;
2419	ref_cm_id(&ep->com);
2420	ep->com.qp = qp;
2421	ref_qp(ep);
2422	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
2423
2424	/* bind QP to EP and move to RTS */
2425	attrs.mpa_attr = ep->mpa_attr;
2426	attrs.max_ird = ep->ird;
2427	attrs.max_ord = ep->ord;
2428	attrs.llp_stream_handle = ep;
2429	attrs.next_state = C4IW_QP_STATE_RTS;
2430
2431	/* bind QP and TID with INIT_WR */
2432	mask = C4IW_QP_ATTR_NEXT_STATE |
2433		C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2434		C4IW_QP_ATTR_MPA_ATTR |
2435		C4IW_QP_ATTR_MAX_IRD |
2436		C4IW_QP_ATTR_MAX_ORD;
2437
2438	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
2439	if (err) {
2440		CTR3(KTR_IW_CXGBE, "%s:caca %p, err: %d", __func__, ep, err);
2441		goto err_defef_cm_id;
2442	}
2443
2444	err = send_mpa_reply(ep, conn_param->private_data,
2445			conn_param->private_data_len);
2446	if (err) {
2447		CTR3(KTR_IW_CXGBE, "%s:cacb %p, err: %d", __func__, ep, err);
2448		goto err_defef_cm_id;
2449	}
2450
2451	ep->com.state = FPDU_MODE;
2452	established_upcall(ep);
2453	mutex_unlock(&ep->com.mutex);
2454	c4iw_put_ep(&ep->com);
2455	CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep);
2456	return 0;
2457err_defef_cm_id:
2458	deref_cm_id(&ep->com);
2459err_abort:
2460	abort = 1;
2461err_out:
2462	if (abort)
2463		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2464	mutex_unlock(&ep->com.mutex);
2465	c4iw_put_ep(&ep->com);
2466	CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep);
2467	return err;
2468}
2469
2470static int
2471c4iw_sock_create(struct sockaddr_storage *laddr, struct socket **so)
2472{
2473	int ret;
2474	int size;
2475	struct socket *sock = NULL;
2476
2477	ret = sock_create_kern(laddr->ss_family,
2478			SOCK_STREAM, IPPROTO_TCP, &sock);
2479	if (ret) {
2480		CTR2(KTR_IW_CXGBE, "%s:Failed to create TCP socket. err %d",
2481				__func__, ret);
2482		return ret;
2483	}
2484
2485	ret = sobind(sock, (struct sockaddr *)laddr, curthread);
2486	if (ret) {
2487		CTR2(KTR_IW_CXGBE, "%s:Failed to bind socket. err %p",
2488				__func__, ret);
2489		sock_release(sock);
2490		return ret;
2491	}
2492
2493	size = laddr->ss_family == AF_INET6 ?
2494		sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
2495	ret = sock_getname(sock, (struct sockaddr *)laddr, &size, 0);
2496	if (ret) {
2497		CTR2(KTR_IW_CXGBE, "%s:sock_getname failed. err %p",
2498				__func__, ret);
2499		sock_release(sock);
2500		return ret;
2501	}
2502
2503	*so = sock;
2504	return 0;
2505}
2506
2507int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2508{
2509	int err = 0;
2510	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2511	struct c4iw_ep *ep = NULL;
2512	struct ifnet    *nh_ifp;        /* Logical egress interface */
2513
2514	CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id);
2515
2516
2517	if ((conn_param->ord > c4iw_max_read_depth) ||
2518		(conn_param->ird > c4iw_max_read_depth)) {
2519
2520		CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id);
2521		err = -EINVAL;
2522		goto out;
2523	}
2524	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2525
2526	init_timer(&ep->timer);
2527	ep->plen = conn_param->private_data_len;
2528
2529	if (ep->plen) {
2530
2531		CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep);
2532		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
2533				conn_param->private_data, ep->plen);
2534	}
2535	ep->ird = conn_param->ird;
2536	ep->ord = conn_param->ord;
2537
2538	if (peer2peer && ep->ord == 0) {
2539
2540		CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep);
2541		ep->ord = 1;
2542	}
2543
2544	ep->com.dev = dev;
2545	ep->com.cm_id = cm_id;
2546	ref_cm_id(&ep->com);
2547	ep->com.qp = get_qhp(dev, conn_param->qpn);
2548
2549	if (!ep->com.qp) {
2550
2551		CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep);
2552		err = -EINVAL;
2553		goto fail;
2554	}
2555	ref_qp(ep);
2556	ep->com.thread = curthread;
2557
2558	err = get_ifnet_from_raddr(&cm_id->remote_addr, &nh_ifp);
2559	if (err) {
2560
2561		CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep);
2562		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
2563		err = EHOSTUNREACH;
2564		return err;
2565	}
2566
2567	if (!(nh_ifp->if_capenable & IFCAP_TOE) ||
2568	    TOEDEV(nh_ifp) == NULL) {
2569		err = -ENOPROTOOPT;
2570		goto fail;
2571	}
2572	ep->com.state = CONNECTING;
2573	ep->tos = 0;
2574	ep->com.local_addr = cm_id->local_addr;
2575	ep->com.remote_addr = cm_id->remote_addr;
2576
2577	err = c4iw_sock_create(&cm_id->local_addr, &ep->com.so);
2578	if (err)
2579		goto fail;
2580
2581	setiwsockopt(ep->com.so);
2582	err = -soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
2583		ep->com.thread);
2584	if (!err) {
2585		init_iwarp_socket(ep->com.so, &ep->com);
2586		goto out;
2587	} else
2588		goto fail_free_so;
2589
2590fail_free_so:
2591	sock_release(ep->com.so);
2592fail:
2593	deref_cm_id(&ep->com);
2594	c4iw_put_ep(&ep->com);
2595	ep = NULL;
2596out:
2597	CTR2(KTR_IW_CXGBE, "%s:ccE ret:%d", __func__, err);
2598	return err;
2599}
2600
2601/*
2602 * iwcm->create_listen.  Returns -errno on failure.
2603 */
2604int
2605c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
2606{
2607	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2608	struct c4iw_listen_ep *lep = NULL;
2609	struct listen_port_info *port_info = NULL;
2610	int rc = 0;
2611
2612	CTR3(KTR_IW_CXGBE, "%s: cm_id %p, backlog %s", __func__, cm_id,
2613			backlog);
2614	lep = alloc_ep(sizeof(*lep), GFP_KERNEL);
2615	lep->com.cm_id = cm_id;
2616	ref_cm_id(&lep->com);
2617	lep->com.dev = dev;
2618	lep->backlog = backlog;
2619	lep->com.local_addr = cm_id->local_addr;
2620	lep->com.thread = curthread;
2621	cm_id->provider_data = lep;
2622	lep->com.state = LISTEN;
2623
2624	/* In case of INDADDR_ANY, ibcore creates cmid for each device and
2625	 * invokes iw_cxgbe listener callbacks assuming that iw_cxgbe creates
2626	 * HW listeners for each device seperately. But toecore expects single
2627	 * solisten() call with INADDR_ANY address to create HW listeners on
2628	 * all devices for a given port number. So iw_cxgbe driver calls
2629	 * solisten() only once for INADDR_ANY(usually done at first time
2630	 * listener callback from ibcore). And all the subsequent INADDR_ANY
2631	 * listener callbacks from ibcore(for the same port address) do not
2632	 * invoke solisten() as first listener callback has already created
2633	 * listeners for all other devices(via solisten).
2634	 */
2635	if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr)) {
2636		port_info = add_ep_to_listenlist(lep);
2637		/* skip solisten() if refcnt > 1, as the listeners were
2638		 * alredy created by 'Master lep'
2639		 */
2640		if (port_info->refcnt > 1) {
2641			/* As there will be only one listener socket for a TCP
2642			 * port, copy Master lep's socket pointer to other lep's
2643			 * that are belonging to same TCP port.
2644			 */
2645			struct c4iw_listen_ep *head_lep =
2646					container_of(port_info->lep_list.next,
2647					struct c4iw_listen_ep, listen_ep_list);
2648			lep->com.so =  head_lep->com.so;
2649			goto out;
2650		}
2651	}
2652	rc = c4iw_sock_create(&cm_id->local_addr, &lep->com.so);
2653	if (rc) {
2654		CTR2(KTR_IW_CXGBE, "%s:Failed to create socket. err %d",
2655				__func__, rc);
2656		goto fail;
2657	}
2658
2659	rc = solisten(lep->com.so, backlog, curthread);
2660	if (rc) {
2661		CTR3(KTR_IW_CXGBE, "%s:Failed to listen on sock:%p. err %d",
2662				__func__, lep->com.so, rc);
2663		goto fail_free_so;
2664	}
2665	init_iwarp_socket(lep->com.so, &lep->com);
2666out:
2667	return 0;
2668
2669fail_free_so:
2670	sock_release(lep->com.so);
2671fail:
2672	if (port_info)
2673		rem_ep_from_listenlist(lep);
2674	deref_cm_id(&lep->com);
2675	c4iw_put_ep(&lep->com);
2676	return rc;
2677}
2678
2679int
2680c4iw_destroy_listen(struct iw_cm_id *cm_id)
2681{
2682	struct c4iw_listen_ep *lep = to_listen_ep(cm_id);
2683
2684	mutex_lock(&lep->com.mutex);
2685	CTR3(KTR_IW_CXGBE, "%s: cm_id %p, state %s", __func__, cm_id,
2686	    states[lep->com.state]);
2687
2688	lep->com.state = DEAD;
2689	if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr)) {
2690		/* if no refcount then close listen socket */
2691		if (!rem_ep_from_listenlist(lep))
2692			close_socket(lep->com.so);
2693	} else
2694		close_socket(lep->com.so);
2695	deref_cm_id(&lep->com);
2696	mutex_unlock(&lep->com.mutex);
2697	c4iw_put_ep(&lep->com);
2698	return 0;
2699}
2700
2701int __c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2702{
2703	int ret;
2704	mutex_lock(&ep->com.mutex);
2705	ret = c4iw_ep_disconnect(ep, abrupt, gfp);
2706	mutex_unlock(&ep->com.mutex);
2707	return ret;
2708}
2709
2710int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2711{
2712	int ret = 0;
2713	int close = 0;
2714	int fatal = 0;
2715	struct c4iw_rdev *rdev;
2716
2717
2718	CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep);
2719
2720	rdev = &ep->com.dev->rdev;
2721
2722	if (c4iw_fatal_error(rdev)) {
2723
2724		CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep);
2725		fatal = 1;
2726		close_complete_upcall(ep, -ECONNRESET);
2727		send_abort(ep);
2728		ep->com.state = DEAD;
2729	}
2730	CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep,
2731	    states[ep->com.state]);
2732
2733	/*
2734	 * Ref the ep here in case we have fatal errors causing the
2735	 * ep to be released and freed.
2736	 */
2737	c4iw_get_ep(&ep->com);
2738	switch (ep->com.state) {
2739
2740		case MPA_REQ_WAIT:
2741		case MPA_REQ_SENT:
2742		case MPA_REQ_RCVD:
2743		case MPA_REP_SENT:
2744		case FPDU_MODE:
2745			close = 1;
2746			if (abrupt)
2747				ep->com.state = ABORTING;
2748			else {
2749				ep->com.state = CLOSING;
2750				START_EP_TIMER(ep);
2751			}
2752			set_bit(CLOSE_SENT, &ep->com.flags);
2753			break;
2754
2755		case CLOSING:
2756
2757			if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2758
2759				close = 1;
2760				if (abrupt) {
2761					STOP_EP_TIMER(ep);
2762					ep->com.state = ABORTING;
2763				} else
2764					ep->com.state = MORIBUND;
2765			}
2766			break;
2767
2768		case MORIBUND:
2769		case ABORTING:
2770		case DEAD:
2771			CTR3(KTR_IW_CXGBE,
2772			    "%s ignoring disconnect ep %p state %u", __func__,
2773			    ep, ep->com.state);
2774			break;
2775
2776		default:
2777			BUG();
2778			break;
2779	}
2780
2781
2782	if (close) {
2783
2784		CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep);
2785
2786		if (abrupt) {
2787
2788			CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep);
2789			set_bit(EP_DISC_ABORT, &ep->com.history);
2790			close_complete_upcall(ep, -ECONNRESET);
2791			ret = send_abort(ep);
2792			if (ret)
2793				fatal = 1;
2794		} else {
2795
2796			CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep);
2797			set_bit(EP_DISC_CLOSE, &ep->com.history);
2798
2799			if (!ep->parent_ep)
2800				ep->com.state = MORIBUND;
2801			sodisconnect(ep->com.so);
2802		}
2803
2804	}
2805
2806	if (fatal) {
2807		set_bit(EP_DISC_FAIL, &ep->com.history);
2808		if (!abrupt) {
2809			STOP_EP_TIMER(ep);
2810			close_complete_upcall(ep, -EIO);
2811		}
2812		if (ep->com.qp) {
2813			struct c4iw_qp_attributes attrs = {0};
2814
2815			attrs.next_state = C4IW_QP_STATE_ERROR;
2816			ret = c4iw_modify_qp(ep->com.dev, ep->com.qp,
2817						C4IW_QP_ATTR_NEXT_STATE,
2818						&attrs, 1);
2819			if (ret) {
2820				CTR2(KTR_IW_CXGBE, "%s:ced7 %p", __func__, ep);
2821				printf("%s - qp <- error failed!\n", __func__);
2822			}
2823		}
2824		release_ep_resources(ep);
2825		ep->com.state = DEAD;
2826		CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep);
2827	}
2828	c4iw_put_ep(&ep->com);
2829	CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep);
2830	return ret;
2831}
2832
2833#ifdef C4IW_EP_REDIRECT
2834int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2835		struct l2t_entry *l2t)
2836{
2837	struct c4iw_ep *ep = ctx;
2838
2839	if (ep->dst != old)
2840		return 0;
2841
2842	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2843			l2t);
2844	dst_hold(new);
2845	cxgb4_l2t_release(ep->l2t);
2846	ep->l2t = l2t;
2847	dst_release(old);
2848	ep->dst = new;
2849	return 1;
2850}
2851#endif
2852
2853
2854
2855static void ep_timeout(unsigned long arg)
2856{
2857	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
2858
2859	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
2860
2861		/*
2862		 * Only insert if it is not already on the list.
2863		 */
2864		if (!(ep->com.ep_events & C4IW_EVENT_TIMEOUT)) {
2865			CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep);
2866			add_ep_to_req_list(ep, C4IW_EVENT_TIMEOUT);
2867		}
2868	}
2869}
2870
2871static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl)
2872{
2873	uint64_t val = be64toh(*rpl);
2874	int ret;
2875	struct c4iw_wr_wait *wr_waitp;
2876
2877	ret = (int)((val >> 8) & 0xff);
2878	wr_waitp = (struct c4iw_wr_wait *)rpl[1];
2879	CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret);
2880	if (wr_waitp)
2881		c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2882
2883	return (0);
2884}
2885
2886static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl)
2887{
2888	struct cqe_list_entry *cle;
2889	unsigned long flag;
2890
2891	cle = malloc(sizeof(*cle), M_CXGBE, M_NOWAIT);
2892	cle->rhp = sc->iwarp_softc;
2893	cle->err_cqe = *(const struct t4_cqe *)(&rpl[0]);
2894
2895	spin_lock_irqsave(&err_cqe_lock, flag);
2896	list_add_tail(&cle->entry, &err_cqe_list);
2897	queue_work(c4iw_taskq, &c4iw_task);
2898	spin_unlock_irqrestore(&err_cqe_lock, flag);
2899
2900	return (0);
2901}
2902
2903static int
2904process_terminate(struct c4iw_ep *ep)
2905{
2906	struct c4iw_qp_attributes attrs = {0};
2907
2908	CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep);
2909
2910	if (ep && ep->com.qp) {
2911
2912		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n",
2913				ep->hwtid, ep->com.qp->wq.sq.qid);
2914		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2915		c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs,
2916				1);
2917	} else
2918		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n",
2919								ep->hwtid);
2920	CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep);
2921
2922	return 0;
2923}
2924
2925int __init c4iw_cm_init(void)
2926{
2927
2928	t4_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
2929	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, fw6_wr_rpl);
2930	t4_register_fw_msg_handler(FW6_TYPE_CQE, fw6_cqe_handler);
2931	t4_register_an_handler(c4iw_ev_handler);
2932
2933	TAILQ_INIT(&req_list);
2934	spin_lock_init(&req_lock);
2935	INIT_LIST_HEAD(&err_cqe_list);
2936	spin_lock_init(&err_cqe_lock);
2937
2938	INIT_WORK(&c4iw_task, process_req);
2939
2940	c4iw_taskq = create_singlethread_workqueue("iw_cxgbe");
2941	if (!c4iw_taskq)
2942		return -ENOMEM;
2943
2944	return 0;
2945}
2946
2947void __exit c4iw_cm_term(void)
2948{
2949	WARN_ON(!TAILQ_EMPTY(&req_list));
2950	WARN_ON(!list_empty(&err_cqe_list));
2951	flush_workqueue(c4iw_taskq);
2952	destroy_workqueue(c4iw_taskq);
2953
2954	t4_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
2955	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, NULL);
2956	t4_register_fw_msg_handler(FW6_TYPE_CQE, NULL);
2957	t4_register_an_handler(NULL);
2958}
2959#endif
2960