cm.c revision 346922
1/*
2 * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	  copyright notice, this list of conditions and the following
16 *	  disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	  copyright notice, this list of conditions and the following
20 *	  disclaimer in the documentation and/or other materials
21 *	  provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/iw_cxgbe/cm.c 346922 2019-04-29 19:47:21Z np $");
34
35#include "opt_inet.h"
36
37#ifdef TCP_OFFLOAD
38#include <sys/types.h>
39#include <sys/malloc.h>
40#include <sys/socket.h>
41#include <sys/socketvar.h>
42#include <sys/sockio.h>
43#include <sys/taskqueue.h>
44#include <netinet/in.h>
45#include <net/route.h>
46
47#include <netinet/in_systm.h>
48#include <netinet/in_pcb.h>
49#include <netinet6/in6_pcb.h>
50#include <netinet/ip.h>
51#include <netinet/in_fib.h>
52#include <netinet6/in6_fib.h>
53#include <netinet6/scope6_var.h>
54#include <netinet/ip_var.h>
55#include <netinet/tcp_var.h>
56#include <netinet/tcp.h>
57#include <netinet/tcpip.h>
58
59#include <netinet/toecore.h>
60
61struct sge_iq;
62struct rss_header;
63struct cpl_set_tcb_rpl;
64#include <linux/types.h>
65#include "offload.h"
66#include "tom/t4_tom.h"
67
68#define TOEPCB(so)  ((struct toepcb *)(so_sototcpcb((so))->t_toe))
69
70#include "iw_cxgbe.h"
71#include <linux/module.h>
72#include <linux/workqueue.h>
73#include <linux/notifier.h>
74#include <linux/inetdevice.h>
75#include <linux/if_vlan.h>
76#include <net/netevent.h>
77#include <rdma/rdma_cm.h>
78
79static spinlock_t req_lock;
80static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
81static struct work_struct c4iw_task;
82static struct workqueue_struct *c4iw_taskq;
83static LIST_HEAD(err_cqe_list);
84static spinlock_t err_cqe_lock;
85static LIST_HEAD(listen_port_list);
86static DEFINE_MUTEX(listen_port_mutex);
87
88static void process_req(struct work_struct *ctx);
89static void start_ep_timer(struct c4iw_ep *ep);
90static int stop_ep_timer(struct c4iw_ep *ep);
91static int set_tcpinfo(struct c4iw_ep *ep);
92static void process_timeout(struct c4iw_ep *ep);
93static void process_err_cqes(void);
94static void *alloc_ep(int size, gfp_t flags);
95static void close_socket(struct socket *so);
96static int send_mpa_req(struct c4iw_ep *ep);
97static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
98static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
99static void close_complete_upcall(struct c4iw_ep *ep, int status);
100static int send_abort(struct c4iw_ep *ep);
101static void peer_close_upcall(struct c4iw_ep *ep);
102static void peer_abort_upcall(struct c4iw_ep *ep);
103static void connect_reply_upcall(struct c4iw_ep *ep, int status);
104static int connect_request_upcall(struct c4iw_ep *ep);
105static void established_upcall(struct c4iw_ep *ep);
106static int process_mpa_reply(struct c4iw_ep *ep);
107static int process_mpa_request(struct c4iw_ep *ep);
108static void process_peer_close(struct c4iw_ep *ep);
109static void process_conn_error(struct c4iw_ep *ep);
110static void process_close_complete(struct c4iw_ep *ep);
111static void ep_timeout(unsigned long arg);
112static void setiwsockopt(struct socket *so);
113static void init_iwarp_socket(struct socket *so, void *arg);
114static void uninit_iwarp_socket(struct socket *so);
115static void process_data(struct c4iw_ep *ep);
116static void process_connected(struct c4iw_ep *ep);
117static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
118static void process_socket_event(struct c4iw_ep *ep);
119static void release_ep_resources(struct c4iw_ep *ep);
120static int process_terminate(struct c4iw_ep *ep);
121static int terminate(struct sge_iq *iq, const struct rss_header *rss,
122    struct mbuf *m);
123static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events);
124static struct listen_port_info *
125add_ep_to_listenlist(struct c4iw_listen_ep *lep);
126static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep);
127static struct c4iw_listen_ep *
128find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so);
129static int get_ifnet_from_raddr(struct sockaddr_storage *raddr,
130		struct ifnet **ifp);
131static void process_newconn(struct c4iw_listen_ep *master_lep,
132		struct socket *new_so);
133#define START_EP_TIMER(ep) \
134    do { \
135	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
136		__func__, __LINE__, (ep)); \
137	    start_ep_timer(ep); \
138    } while (0)
139
140#define STOP_EP_TIMER(ep) \
141    ({ \
142	    CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
143		__func__, __LINE__, (ep)); \
144	    stop_ep_timer(ep); \
145    })
146
147#define GET_LOCAL_ADDR(pladdr, so) \
148	do { \
149		struct sockaddr_storage *__a = NULL; \
150		struct  inpcb *__inp = sotoinpcb(so); \
151		KASSERT(__inp != NULL, \
152		   ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
153		if (__inp->inp_vflag & INP_IPV4) \
154			in_getsockaddr(so, (struct sockaddr **)&__a); \
155		else \
156			in6_getsockaddr(so, (struct sockaddr **)&__a); \
157		*(pladdr) = *__a; \
158		free(__a, M_SONAME); \
159	} while (0)
160
161#define GET_REMOTE_ADDR(praddr, so) \
162	do { \
163		struct sockaddr_storage *__a = NULL; \
164		struct  inpcb *__inp = sotoinpcb(so); \
165		KASSERT(__inp != NULL, \
166		   ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
167		if (__inp->inp_vflag & INP_IPV4) \
168			in_getpeeraddr(so, (struct sockaddr **)&__a); \
169		else \
170			in6_getpeeraddr(so, (struct sockaddr **)&__a); \
171		*(praddr) = *__a; \
172		free(__a, M_SONAME); \
173	} while (0)
174
175#ifdef KTR
176static char *states[] = {
177	"idle",
178	"listen",
179	"connecting",
180	"mpa_wait_req",
181	"mpa_req_sent",
182	"mpa_req_rcvd",
183	"mpa_rep_sent",
184	"fpdu_mode",
185	"aborting",
186	"closing",
187	"moribund",
188	"dead",
189	NULL,
190};
191#endif
192
193static void deref_cm_id(struct c4iw_ep_common *epc)
194{
195      epc->cm_id->rem_ref(epc->cm_id);
196      epc->cm_id = NULL;
197      set_bit(CM_ID_DEREFED, &epc->history);
198}
199
200static void ref_cm_id(struct c4iw_ep_common *epc)
201{
202      set_bit(CM_ID_REFED, &epc->history);
203      epc->cm_id->add_ref(epc->cm_id);
204}
205
206static void deref_qp(struct c4iw_ep *ep)
207{
208	c4iw_qp_rem_ref(&ep->com.qp->ibqp);
209	clear_bit(QP_REFERENCED, &ep->com.flags);
210	set_bit(QP_DEREFED, &ep->com.history);
211}
212
213static void ref_qp(struct c4iw_ep *ep)
214{
215	set_bit(QP_REFERENCED, &ep->com.flags);
216	set_bit(QP_REFED, &ep->com.history);
217	c4iw_qp_add_ref(&ep->com.qp->ibqp);
218}
219/* allocated per TCP port while listening */
220struct listen_port_info {
221	uint16_t port_num; /* TCP port address */
222	struct list_head list; /* belongs to listen_port_list */
223	struct list_head lep_list; /* per port lep list */
224	uint32_t refcnt; /* number of lep's listening */
225};
226
227/*
228 * Following two lists are used to manage INADDR_ANY listeners:
229 * 1)listen_port_list
230 * 2)lep_list
231 *
232 * Below is the INADDR_ANY listener lists overview on a system with a two port
233 * adapter:
234 *   |------------------|
235 *   |listen_port_list  |
236 *   |------------------|
237 *            |
238 *            |              |-----------|       |-----------|
239 *            |              | port_num:X|       | port_num:X|
240 *            |--------------|-list------|-------|-list------|-------....
241 *                           | lep_list----|     | lep_list----|
242 *                           | refcnt    | |     | refcnt    | |
243 *                           |           | |     |           | |
244 *                           |           | |     |           | |
245 *                           |-----------| |     |-----------| |
246 *                                         |                   |
247 *                                         |                   |
248 *                                         |                   |
249 *                                         |                   |         lep1                  lep2
250 *                                         |                   |    |----------------|    |----------------|
251 *                                         |                   |----| listen_ep_list |----| listen_ep_list |
252 *                                         |                        |----------------|    |----------------|
253 *                                         |
254 *                                         |
255 *                                         |        lep1                  lep2
256 *                                         |   |----------------|    |----------------|
257 *                                         |---| listen_ep_list |----| listen_ep_list |
258 *                                             |----------------|    |----------------|
259 *
260 * Because of two port adapter, the number of lep's are two(lep1 & lep2) for
261 * each TCP port number.
262 *
263 * Here 'lep1' is always marked as Master lep, because solisten() is always
264 * called through first lep.
265 *
266 */
267static struct listen_port_info *
268add_ep_to_listenlist(struct c4iw_listen_ep *lep)
269{
270	uint16_t port;
271	struct listen_port_info *port_info = NULL;
272	struct sockaddr_storage *laddr = &lep->com.local_addr;
273
274	port = (laddr->ss_family == AF_INET) ?
275		((struct sockaddr_in *)laddr)->sin_port :
276		((struct sockaddr_in6 *)laddr)->sin6_port;
277
278	mutex_lock(&listen_port_mutex);
279
280	list_for_each_entry(port_info, &listen_port_list, list)
281		if (port_info->port_num == port)
282			goto found_port;
283
284	port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK);
285	port_info->port_num = port;
286	port_info->refcnt    = 0;
287
288	list_add_tail(&port_info->list, &listen_port_list);
289	INIT_LIST_HEAD(&port_info->lep_list);
290
291found_port:
292	port_info->refcnt++;
293	list_add_tail(&lep->listen_ep_list, &port_info->lep_list);
294	mutex_unlock(&listen_port_mutex);
295	return port_info;
296}
297
298static int
299rem_ep_from_listenlist(struct c4iw_listen_ep *lep)
300{
301	uint16_t port;
302	struct listen_port_info *port_info = NULL;
303	struct sockaddr_storage *laddr = &lep->com.local_addr;
304	int refcnt = 0;
305
306	port = (laddr->ss_family == AF_INET) ?
307		((struct sockaddr_in *)laddr)->sin_port :
308		((struct sockaddr_in6 *)laddr)->sin6_port;
309
310	mutex_lock(&listen_port_mutex);
311
312	/* get the port_info structure based on the lep's port address */
313	list_for_each_entry(port_info, &listen_port_list, list) {
314		if (port_info->port_num == port) {
315			port_info->refcnt--;
316			refcnt = port_info->refcnt;
317			/* remove the current lep from the listen list */
318			list_del(&lep->listen_ep_list);
319			if (port_info->refcnt == 0) {
320				/* Remove this entry from the list as there
321				 * are no more listeners for this port_num.
322				 */
323				list_del(&port_info->list);
324				kfree(port_info);
325			}
326			break;
327		}
328	}
329	mutex_unlock(&listen_port_mutex);
330	return refcnt;
331}
332
333/*
334 * Find the lep that belongs to the ifnet on which the SYN frame was received.
335 */
336struct c4iw_listen_ep *
337find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so)
338{
339	struct adapter *adap = NULL;
340	struct c4iw_listen_ep *lep = NULL;
341	struct ifnet *ifp = NULL, *hw_ifp = NULL;
342	struct listen_port_info *port_info = NULL;
343	int i = 0, found_portinfo = 0, found_lep = 0;
344	uint16_t port;
345
346	/*
347	 * STEP 1: Figure out 'ifp' of the physical interface, not pseudo
348	 * interfaces like vlan, lagg, etc..
349	 * TBD: lagg support, lagg + vlan support.
350	 */
351	ifp = TOEPCB(so)->l2te->ifp;
352	if (ifp->if_type == IFT_L2VLAN) {
353		hw_ifp = VLAN_TRUNKDEV(ifp);
354		if (hw_ifp == NULL) {
355			CTR4(KTR_IW_CXGBE, "%s: Failed to get parent ifnet of "
356				"vlan ifnet %p, sock %p, master_lep %p",
357				__func__, ifp, so, master_lep);
358			return (NULL);
359		}
360	} else
361		hw_ifp = ifp;
362
363	/* STEP 2: Find 'port_info' with listener local port address. */
364	port = (master_lep->com.local_addr.ss_family == AF_INET) ?
365		((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port :
366		((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port;
367
368
369	mutex_lock(&listen_port_mutex);
370	list_for_each_entry(port_info, &listen_port_list, list)
371		if (port_info->port_num == port) {
372			found_portinfo =1;
373			break;
374		}
375	if (!found_portinfo)
376		goto out;
377
378	/* STEP 3: Traverse through list of lep's that are bound to the current
379	 * TCP port address and find the lep that belongs to the ifnet on which
380	 * the SYN frame was received.
381	 */
382	list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) {
383		adap = lep->com.dev->rdev.adap;
384		for_each_port(adap, i) {
385			if (hw_ifp == adap->port[i]->vi[0].ifp) {
386				found_lep =1;
387				goto out;
388			}
389		}
390	}
391out:
392	mutex_unlock(&listen_port_mutex);
393	return found_lep ? lep : (NULL);
394}
395
396static void process_timeout(struct c4iw_ep *ep)
397{
398	struct c4iw_qp_attributes attrs = {0};
399	int abort = 1;
400
401	CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
402			ep, ep->hwtid, ep->com.state);
403	set_bit(TIMEDOUT, &ep->com.history);
404	switch (ep->com.state) {
405	case MPA_REQ_SENT:
406		connect_reply_upcall(ep, -ETIMEDOUT);
407		break;
408	case MPA_REQ_WAIT:
409	case MPA_REQ_RCVD:
410	case MPA_REP_SENT:
411	case FPDU_MODE:
412		break;
413	case CLOSING:
414	case MORIBUND:
415		if (ep->com.cm_id && ep->com.qp) {
416			attrs.next_state = C4IW_QP_STATE_ERROR;
417			c4iw_modify_qp(ep->com.dev, ep->com.qp,
418					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
419		}
420		close_complete_upcall(ep, -ETIMEDOUT);
421		break;
422	case ABORTING:
423	case DEAD:
424		/*
425		 * These states are expected if the ep timed out at the same
426		 * time as another thread was calling stop_ep_timer().
427		 * So we silently do nothing for these states.
428		 */
429		abort = 0;
430		break;
431	default:
432		CTR4(KTR_IW_CXGBE, "%s unexpected state ep %p tid %u state %u\n"
433				, __func__, ep, ep->hwtid, ep->com.state);
434		abort = 0;
435	}
436	if (abort)
437		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
438	c4iw_put_ep(&ep->com);
439	return;
440}
441
442struct cqe_list_entry {
443	struct list_head entry;
444	struct c4iw_dev *rhp;
445	struct t4_cqe err_cqe;
446};
447
448static void
449process_err_cqes(void)
450{
451	unsigned long flag;
452	struct cqe_list_entry *cle;
453
454	spin_lock_irqsave(&err_cqe_lock, flag);
455	while (!list_empty(&err_cqe_list)) {
456		struct list_head *tmp;
457		tmp = err_cqe_list.next;
458		list_del(tmp);
459		tmp->next = tmp->prev = NULL;
460		spin_unlock_irqrestore(&err_cqe_lock, flag);
461		cle = list_entry(tmp, struct cqe_list_entry, entry);
462		c4iw_ev_dispatch(cle->rhp, &cle->err_cqe);
463		free(cle, M_CXGBE);
464		spin_lock_irqsave(&err_cqe_lock, flag);
465	}
466	spin_unlock_irqrestore(&err_cqe_lock, flag);
467
468	return;
469}
470
471static void
472process_req(struct work_struct *ctx)
473{
474	struct c4iw_ep_common *epc;
475	unsigned long flag;
476	int ep_events;
477
478	process_err_cqes();
479	spin_lock_irqsave(&req_lock, flag);
480	while (!TAILQ_EMPTY(&req_list)) {
481		epc = TAILQ_FIRST(&req_list);
482		TAILQ_REMOVE(&req_list, epc, entry);
483		epc->entry.tqe_prev = NULL;
484		ep_events = epc->ep_events;
485		epc->ep_events = 0;
486		spin_unlock_irqrestore(&req_lock, flag);
487		mutex_lock(&epc->mutex);
488		CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x",
489		    __func__, epc->so, epc, states[epc->state], ep_events);
490		if (ep_events & C4IW_EVENT_TERM)
491			process_terminate((struct c4iw_ep *)epc);
492		if (ep_events & C4IW_EVENT_TIMEOUT)
493			process_timeout((struct c4iw_ep *)epc);
494		if (ep_events & C4IW_EVENT_SOCKET)
495			process_socket_event((struct c4iw_ep *)epc);
496		mutex_unlock(&epc->mutex);
497		c4iw_put_ep(epc);
498		process_err_cqes();
499		spin_lock_irqsave(&req_lock, flag);
500	}
501	spin_unlock_irqrestore(&req_lock, flag);
502}
503
504/*
505 * XXX: doesn't belong here in the iWARP driver.
506 * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
507 *      set.  Is this a valid assumption for active open?
508 */
509static int
510set_tcpinfo(struct c4iw_ep *ep)
511{
512	struct socket *so = ep->com.so;
513	struct inpcb *inp = sotoinpcb(so);
514	struct tcpcb *tp;
515	struct toepcb *toep;
516	int rc = 0;
517
518	INP_WLOCK(inp);
519	tp = intotcpcb(inp);
520	if ((tp->t_flags & TF_TOE) == 0) {
521		rc = EINVAL;
522		log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
523		    __func__, so, ep);
524		goto done;
525	}
526	toep = TOEPCB(so);
527
528	ep->hwtid = toep->tid;
529	ep->snd_seq = tp->snd_nxt;
530	ep->rcv_seq = tp->rcv_nxt;
531	ep->emss = max(tp->t_maxseg, 128);
532done:
533	INP_WUNLOCK(inp);
534	return (rc);
535
536}
537static int
538get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp)
539{
540	int err = 0;
541
542	if (raddr->ss_family == AF_INET) {
543		struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr;
544		struct nhop4_extended nh4 = {0};
545
546		err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr,
547				NHR_REF, 0, &nh4);
548		*ifp = nh4.nh_ifp;
549		if (err)
550			fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
551	} else {
552		struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr;
553		struct nhop6_extended nh6 = {0};
554		struct in6_addr addr6;
555		uint32_t scopeid;
556
557		memset(&addr6, 0, sizeof(addr6));
558		in6_splitscope((struct in6_addr *)&raddr6->sin6_addr,
559					&addr6, &scopeid);
560		err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid,
561				NHR_REF, 0, &nh6);
562		*ifp = nh6.nh_ifp;
563		if (err)
564			fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6);
565	}
566
567	CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err);
568	return err;
569}
570
571static void
572close_socket(struct socket *so)
573{
574	uninit_iwarp_socket(so);
575	soclose(so);
576}
577
578static void
579process_peer_close(struct c4iw_ep *ep)
580{
581	struct c4iw_qp_attributes attrs = {0};
582	int disconnect = 1;
583	int release = 0;
584
585	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
586	    ep->com.so, states[ep->com.state]);
587
588	switch (ep->com.state) {
589
590		case MPA_REQ_WAIT:
591			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD",
592			    __func__, ep);
593			/* Fallthrough */
594		case MPA_REQ_SENT:
595			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD",
596			    __func__, ep);
597			ep->com.state = DEAD;
598			connect_reply_upcall(ep, -ECONNABORTED);
599
600			disconnect = 0;
601			STOP_EP_TIMER(ep);
602			close_socket(ep->com.so);
603			deref_cm_id(&ep->com);
604			release = 1;
605			break;
606
607		case MPA_REQ_RCVD:
608
609			/*
610			 * We're gonna mark this puppy DEAD, but keep
611			 * the reference on it until the ULP accepts or
612			 * rejects the CR.
613			 */
614			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
615			    __func__, ep);
616			ep->com.state = CLOSING;
617			break;
618
619		case MPA_REP_SENT:
620			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
621			    __func__, ep);
622			ep->com.state = CLOSING;
623			break;
624
625		case FPDU_MODE:
626			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
627			    __func__, ep);
628			START_EP_TIMER(ep);
629			ep->com.state = CLOSING;
630			attrs.next_state = C4IW_QP_STATE_CLOSING;
631			c4iw_modify_qp(ep->com.dev, ep->com.qp,
632					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
633			peer_close_upcall(ep);
634			break;
635
636		case ABORTING:
637			CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
638			    __func__, ep);
639			disconnect = 0;
640			break;
641
642		case CLOSING:
643			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
644			    __func__, ep);
645			ep->com.state = MORIBUND;
646			disconnect = 0;
647			break;
648
649		case MORIBUND:
650			CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
651			    ep);
652			STOP_EP_TIMER(ep);
653			if (ep->com.cm_id && ep->com.qp) {
654				attrs.next_state = C4IW_QP_STATE_IDLE;
655				c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
656						C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
657			}
658			close_socket(ep->com.so);
659			close_complete_upcall(ep, 0);
660			ep->com.state = DEAD;
661			release = 1;
662			disconnect = 0;
663			break;
664
665		case DEAD:
666			CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
667			    __func__, ep);
668			disconnect = 0;
669			break;
670
671		default:
672			panic("%s: ep %p state %d", __func__, ep,
673			    ep->com.state);
674			break;
675	}
676
677
678	if (disconnect) {
679
680		CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
681		c4iw_ep_disconnect(ep, 0, M_NOWAIT);
682	}
683	if (release) {
684
685		CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
686		c4iw_put_ep(&ep->com);
687	}
688	CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
689	return;
690}
691
692static void
693process_conn_error(struct c4iw_ep *ep)
694{
695	struct c4iw_qp_attributes attrs = {0};
696	int ret;
697	int state;
698
699	state = ep->com.state;
700	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
701	    __func__, ep, ep->com.so, ep->com.so->so_error,
702	    states[ep->com.state]);
703
704	switch (state) {
705
706		case MPA_REQ_WAIT:
707			STOP_EP_TIMER(ep);
708			c4iw_put_ep(&ep->parent_ep->com);
709			break;
710
711		case MPA_REQ_SENT:
712			STOP_EP_TIMER(ep);
713			connect_reply_upcall(ep, -ECONNRESET);
714			break;
715
716		case MPA_REP_SENT:
717			ep->com.rpl_err = ECONNRESET;
718			CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
719			break;
720
721		case MPA_REQ_RCVD:
722			break;
723
724		case MORIBUND:
725		case CLOSING:
726			STOP_EP_TIMER(ep);
727			/*FALLTHROUGH*/
728		case FPDU_MODE:
729
730			if (ep->com.cm_id && ep->com.qp) {
731
732				attrs.next_state = C4IW_QP_STATE_ERROR;
733				ret = c4iw_modify_qp(ep->com.qp->rhp,
734					ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
735					&attrs, 1);
736				if (ret)
737					log(LOG_ERR,
738							"%s - qp <- error failed!\n",
739							__func__);
740			}
741			peer_abort_upcall(ep);
742			break;
743
744		case ABORTING:
745			break;
746
747		case DEAD:
748			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
749			    __func__, ep->com.so->so_error);
750			return;
751
752		default:
753			panic("%s: ep %p state %d", __func__, ep, state);
754			break;
755	}
756
757	if (state != ABORTING) {
758		close_socket(ep->com.so);
759		ep->com.state = DEAD;
760		c4iw_put_ep(&ep->com);
761	}
762	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
763	return;
764}
765
766static void
767process_close_complete(struct c4iw_ep *ep)
768{
769	struct c4iw_qp_attributes attrs = {0};
770	int release = 0;
771
772	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
773	    ep->com.so, states[ep->com.state]);
774
775	/* The cm_id may be null if we failed to connect */
776	set_bit(CLOSE_CON_RPL, &ep->com.history);
777
778	switch (ep->com.state) {
779
780		case CLOSING:
781			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
782			    __func__, ep);
783			ep->com.state = MORIBUND;
784			break;
785
786		case MORIBUND:
787			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
788			    ep);
789			STOP_EP_TIMER(ep);
790
791			if ((ep->com.cm_id) && (ep->com.qp)) {
792
793				CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
794				    __func__, ep);
795				attrs.next_state = C4IW_QP_STATE_IDLE;
796				c4iw_modify_qp(ep->com.dev,
797						ep->com.qp,
798						C4IW_QP_ATTR_NEXT_STATE,
799						&attrs, 1);
800			}
801
802			close_socket(ep->com.so);
803			close_complete_upcall(ep, 0);
804			ep->com.state = DEAD;
805			release = 1;
806			break;
807
808		case ABORTING:
809			CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
810			break;
811
812		case DEAD:
813			CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
814			break;
815		default:
816			CTR2(KTR_IW_CXGBE, "%s:pcc7 %p unknown ep state",
817					__func__, ep);
818			panic("%s:pcc6 %p unknown ep state", __func__, ep);
819			break;
820	}
821
822	if (release) {
823
824		CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
825		release_ep_resources(ep);
826	}
827	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
828	return;
829}
830
831static void
832setiwsockopt(struct socket *so)
833{
834	int rc;
835	struct sockopt sopt;
836	int on = 1;
837
838	sopt.sopt_dir = SOPT_SET;
839	sopt.sopt_level = IPPROTO_TCP;
840	sopt.sopt_name = TCP_NODELAY;
841	sopt.sopt_val = (caddr_t)&on;
842	sopt.sopt_valsize = sizeof on;
843	sopt.sopt_td = NULL;
844	rc = sosetopt(so, &sopt);
845	if (rc) {
846		log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
847		    __func__, so, rc);
848	}
849}
850
851static void
852init_iwarp_socket(struct socket *so, void *arg)
853{
854
855	SOCKBUF_LOCK(&so->so_rcv);
856	soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
857	so->so_state |= SS_NBIO;
858	SOCKBUF_UNLOCK(&so->so_rcv);
859}
860
861static void
862uninit_iwarp_socket(struct socket *so)
863{
864
865	SOCKBUF_LOCK(&so->so_rcv);
866	soupcall_clear(so, SO_RCV);
867	SOCKBUF_UNLOCK(&so->so_rcv);
868}
869
870static void
871process_data(struct c4iw_ep *ep)
872{
873	int disconnect = 0;
874
875	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
876	    ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
877
878	switch (ep->com.state) {
879	case MPA_REQ_SENT:
880		disconnect = process_mpa_reply(ep);
881		break;
882	case MPA_REQ_WAIT:
883		disconnect = process_mpa_request(ep);
884		if (disconnect)
885			/* Refered in process_newconn() */
886			c4iw_put_ep(&ep->parent_ep->com);
887		break;
888	default:
889		if (sbused(&ep->com.so->so_rcv))
890			log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
891			    "state %d, so %p, so_state 0x%x, sbused %u\n",
892			    __func__, ep, ep->com.state, ep->com.so,
893			    ep->com.so->so_state, sbused(&ep->com.so->so_rcv));
894		break;
895	}
896	if (disconnect)
897		c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
898
899}
900
901static void
902process_connected(struct c4iw_ep *ep)
903{
904	struct socket *so = ep->com.so;
905
906	if ((so->so_state & SS_ISCONNECTED) && !so->so_error) {
907		if (send_mpa_req(ep))
908			goto err;
909	} else {
910		connect_reply_upcall(ep, -so->so_error);
911		goto err;
912	}
913	return;
914err:
915	close_socket(so);
916	ep->com.state = DEAD;
917	c4iw_put_ep(&ep->com);
918	return;
919}
920
921static inline int c4iw_zero_addr(struct sockaddr *addr)
922{
923	struct in6_addr *ip6;
924
925	if (addr->sa_family == AF_INET)
926		return IN_ZERONET(
927			ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr));
928	else {
929		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
930		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
931				ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
932	}
933}
934
935static inline int c4iw_loopback_addr(struct sockaddr *addr)
936{
937	if (addr->sa_family == AF_INET)
938		return IN_LOOPBACK(
939			ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr));
940	else
941		return IN6_IS_ADDR_LOOPBACK(
942				&((struct sockaddr_in6 *) addr)->sin6_addr);
943}
944
945static inline int c4iw_any_addr(struct sockaddr *addr)
946{
947	return c4iw_zero_addr(addr) || c4iw_loopback_addr(addr);
948}
949
950static void
951process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so)
952{
953	struct c4iw_listen_ep *real_lep = NULL;
954	struct c4iw_ep *new_ep = NULL;
955	int ret = 0;
956
957	MPASS(new_so != NULL);
958
959	if (c4iw_any_addr((struct sockaddr *)&master_lep->com.local_addr)) {
960		/* Here we need to find the 'real_lep' that belongs to the
961		 * incomming socket's network interface, such that the newly
962		 * created 'ep' can be attached to the real 'lep'.
963		 */
964		real_lep = find_real_listen_ep(master_lep, new_so);
965		if (real_lep == NULL) {
966			CTR2(KTR_IW_CXGBE, "%s: Could not find the real listen "
967					"ep for sock: %p", __func__, new_so);
968			log(LOG_ERR,"%s: Could not find the real listen ep for "
969					"sock: %p\n", __func__, new_so);
970			/* FIXME: properly free the 'new_so' in failure case.
971			 * Use of soabort() and  soclose() are not legal
972			 * here(before soaccept()).
973			 */
974			return;
975		}
976	} else /* for Non-Wildcard address, master_lep is always the real_lep */
977		real_lep = master_lep;
978
979	new_ep = alloc_ep(sizeof(*new_ep), GFP_KERNEL);
980
981	CTR6(KTR_IW_CXGBE, "%s: master_lep %p, real_lep: %p, new ep %p, "
982	    "listening so %p, new so %p", __func__, master_lep, real_lep,
983	    new_ep, master_lep->com.so, new_so);
984
985	new_ep->com.dev = real_lep->com.dev;
986	new_ep->com.so = new_so;
987	new_ep->com.cm_id = NULL;
988	new_ep->com.thread = real_lep->com.thread;
989	new_ep->parent_ep = real_lep;
990
991	GET_LOCAL_ADDR(&new_ep->com.local_addr, new_so);
992	GET_REMOTE_ADDR(&new_ep->com.remote_addr, new_so);
993	c4iw_get_ep(&real_lep->com);
994	init_timer(&new_ep->timer);
995	new_ep->com.state = MPA_REQ_WAIT;
996	START_EP_TIMER(new_ep);
997
998	setiwsockopt(new_so);
999
1000	/* MPA request might have been queued up on the socket already, so we
1001	 * initialize the socket/upcall_handler under lock to prevent processing
1002	 * MPA request on another thread(via process_req()) simultaniously.
1003	 */
1004	c4iw_get_ep(&new_ep->com); /* Dereferenced at the end below, this is to
1005				      avoid freeing of ep before ep unlock. */
1006	mutex_lock(&new_ep->com.mutex);
1007	init_iwarp_socket(new_so, &new_ep->com);
1008
1009	ret = process_mpa_request(new_ep);
1010	if (ret) {
1011		/* ABORT */
1012		c4iw_ep_disconnect(new_ep, 1, GFP_KERNEL);
1013		c4iw_put_ep(&real_lep->com);
1014	}
1015	mutex_unlock(&new_ep->com.mutex);
1016	c4iw_put_ep(&new_ep->com);
1017	return;
1018}
1019
1020static int
1021add_ep_to_req_list(struct c4iw_ep *ep, int new_ep_event)
1022{
1023	unsigned long flag;
1024
1025	spin_lock_irqsave(&req_lock, flag);
1026	if (ep && ep->com.so) {
1027		ep->com.ep_events |= new_ep_event;
1028		if (!ep->com.entry.tqe_prev) {
1029			c4iw_get_ep(&ep->com);
1030			TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
1031			queue_work(c4iw_taskq, &c4iw_task);
1032		}
1033	}
1034	spin_unlock_irqrestore(&req_lock, flag);
1035
1036	return (0);
1037}
1038
1039static int
1040c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
1041{
1042	struct c4iw_ep *ep = arg;
1043
1044	CTR6(KTR_IW_CXGBE,
1045	    "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
1046	    __func__, so, so->so_state, ep, states[ep->com.state],
1047	    ep->com.entry.tqe_prev);
1048
1049	MPASS(ep->com.so == so);
1050	/*
1051	 * Wake up any threads waiting in rdma_init()/rdma_fini(),
1052	 * with locks held.
1053	 */
1054	if (so->so_error)
1055		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1056	add_ep_to_req_list(ep, C4IW_EVENT_SOCKET);
1057
1058	return (SU_OK);
1059}
1060
1061
1062static int
1063terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1064{
1065	struct adapter *sc = iq->adapter;
1066	const struct cpl_rdma_terminate *cpl = mtod(m, const void *);
1067	unsigned int tid = GET_TID(cpl);
1068	struct toepcb *toep = lookup_tid(sc, tid);
1069	struct socket *so;
1070	struct c4iw_ep *ep;
1071
1072	INP_WLOCK(toep->inp);
1073	so = inp_inpcbtosocket(toep->inp);
1074	ep = so->so_rcv.sb_upcallarg;
1075	INP_WUNLOCK(toep->inp);
1076
1077	CTR3(KTR_IW_CXGBE, "%s: so %p, ep %p", __func__, so, ep);
1078	add_ep_to_req_list(ep, C4IW_EVENT_TERM);
1079
1080	return 0;
1081}
1082
1083static struct socket *
1084dequeue_socket(struct socket *head)
1085{
1086	struct socket *so;
1087	struct sockaddr_in *remote;
1088
1089	ACCEPT_LOCK();
1090	so = TAILQ_FIRST(&head->so_comp);
1091	if (!so) {
1092		ACCEPT_UNLOCK();
1093		return NULL;
1094	}
1095
1096	SOCK_LOCK(so);
1097	/*
1098	 * Before changing the flags on the socket, we have to bump the
1099	 * reference count.  Otherwise, if the protocol calls sofree(),
1100	 * the socket will be released due to a zero refcount.
1101	 */
1102	soref(so);
1103	TAILQ_REMOVE(&head->so_comp, so, so_list);
1104	head->so_qlen--;
1105	so->so_qstate &= ~SQ_COMP;
1106	so->so_head = NULL;
1107	so->so_state |= SS_NBIO;
1108	SOCK_UNLOCK(so);
1109	ACCEPT_UNLOCK();
1110	remote = NULL;
1111	if (soaccept(so, (struct sockaddr **)&remote) != 0) {
1112		soclose(so);
1113		so = NULL;
1114	}
1115	free(remote, M_SONAME);
1116	return so;
1117}
1118
1119static void
1120process_socket_event(struct c4iw_ep *ep)
1121{
1122	int state = ep->com.state;
1123	struct socket *so = ep->com.so;
1124
1125	if (ep->com.state == DEAD) {
1126		CTR3(KTR_IW_CXGBE, "%s: Pending socket event discarded "
1127			"ep %p ep_state %s", __func__, ep, states[state]);
1128		return;
1129	}
1130
1131	CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
1132	    "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
1133	    so->so_error, so->so_rcv.sb_state, ep, states[state]);
1134
1135	if (state == CONNECTING) {
1136		process_connected(ep);
1137		return;
1138	}
1139
1140	if (state == LISTEN) {
1141		struct c4iw_listen_ep *lep = (struct c4iw_listen_ep *)ep;
1142		struct socket *new_so;
1143
1144		while ((new_so = dequeue_socket(so)) != NULL) {
1145			process_newconn(lep, new_so);
1146		}
1147		return;
1148	}
1149
1150	/* connection error */
1151	if (so->so_error) {
1152		process_conn_error(ep);
1153		return;
1154	}
1155
1156	/* peer close */
1157	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state <= CLOSING) {
1158		process_peer_close(ep);
1159		/*
1160		 * check whether socket disconnect event is pending before
1161		 * returning. Fallthrough if yes.
1162		 */
1163		if (!(so->so_state & SS_ISDISCONNECTED))
1164			return;
1165	}
1166
1167	/* close complete */
1168	if (so->so_state & SS_ISDISCONNECTED) {
1169		process_close_complete(ep);
1170		return;
1171	}
1172
1173	/* rx data */
1174	process_data(ep);
1175}
1176
1177SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters");
1178
1179static int dack_mode = 0;
1180SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0,
1181		"Delayed ack mode (default = 0)");
1182
1183int c4iw_max_read_depth = 8;
1184SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0,
1185		"Per-connection max ORD/IRD (default = 8)");
1186
1187static int enable_tcp_timestamps;
1188SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0,
1189		"Enable tcp timestamps (default = 0)");
1190
1191static int enable_tcp_sack;
1192SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0,
1193		"Enable tcp SACK (default = 0)");
1194
1195static int enable_tcp_window_scaling = 1;
1196SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0,
1197		"Enable tcp window scaling (default = 1)");
1198
1199int c4iw_debug = 0;
1200SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0,
1201		"Enable debug logging (default = 0)");
1202
1203static int peer2peer = 1;
1204SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0,
1205		"Support peer2peer ULPs (default = 1)");
1206
1207static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
1208SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0,
1209		"RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)");
1210
1211static int ep_timeout_secs = 60;
1212SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0,
1213		"CM Endpoint operation timeout in seconds (default = 60)");
1214
1215static int mpa_rev = 1;
1216SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0,
1217		"MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
1218
1219static int markers_enabled;
1220SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0,
1221		"Enable MPA MARKERS (default(0) = disabled)");
1222
1223static int crc_enabled = 1;
1224SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0,
1225		"Enable MPA CRC (default(1) = enabled)");
1226
1227static int rcv_win = 256 * 1024;
1228SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0,
1229		"TCP receive window in bytes (default = 256KB)");
1230
1231static int snd_win = 128 * 1024;
1232SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0,
1233		"TCP send window in bytes (default = 128KB)");
1234
1235static void
1236start_ep_timer(struct c4iw_ep *ep)
1237{
1238
1239	if (timer_pending(&ep->timer)) {
1240		CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
1241		printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
1242		    ep);
1243		return;
1244	}
1245	clear_bit(TIMEOUT, &ep->com.flags);
1246	c4iw_get_ep(&ep->com);
1247	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
1248	ep->timer.data = (unsigned long)ep;
1249	ep->timer.function = ep_timeout;
1250	add_timer(&ep->timer);
1251}
1252
1253static int
1254stop_ep_timer(struct c4iw_ep *ep)
1255{
1256
1257	del_timer_sync(&ep->timer);
1258	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
1259		c4iw_put_ep(&ep->com);
1260		return 0;
1261	}
1262	return 1;
1263}
1264
1265static void *
1266alloc_ep(int size, gfp_t gfp)
1267{
1268	struct c4iw_ep_common *epc;
1269
1270	epc = kzalloc(size, gfp);
1271	if (epc == NULL)
1272		return (NULL);
1273
1274	kref_init(&epc->kref);
1275	mutex_init(&epc->mutex);
1276	c4iw_init_wr_wait(&epc->wr_wait);
1277
1278	return (epc);
1279}
1280
1281void _c4iw_free_ep(struct kref *kref)
1282{
1283	struct c4iw_ep *ep;
1284	struct c4iw_ep_common *epc;
1285
1286	ep = container_of(kref, struct c4iw_ep, com.kref);
1287	epc = &ep->com;
1288	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
1289	    __func__, epc));
1290	if (test_bit(QP_REFERENCED, &ep->com.flags))
1291		deref_qp(ep);
1292	CTR4(KTR_IW_CXGBE, "%s: ep %p, history 0x%lx, flags 0x%lx",
1293	    __func__, ep, epc->history, epc->flags);
1294	kfree(ep);
1295}
1296
1297static void release_ep_resources(struct c4iw_ep *ep)
1298{
1299	CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
1300	set_bit(RELEASE_RESOURCES, &ep->com.flags);
1301	c4iw_put_ep(&ep->com);
1302	CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
1303}
1304
1305static int
1306send_mpa_req(struct c4iw_ep *ep)
1307{
1308	int mpalen;
1309	struct mpa_message *mpa;
1310	struct mpa_v2_conn_params mpa_v2_params;
1311	struct mbuf *m;
1312	char mpa_rev_to_use = mpa_rev;
1313	int err = 0;
1314
1315	if (ep->retry_with_mpa_v1)
1316		mpa_rev_to_use = 1;
1317	mpalen = sizeof(*mpa) + ep->plen;
1318	if (mpa_rev_to_use == 2)
1319		mpalen += sizeof(struct mpa_v2_conn_params);
1320
1321	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1322	if (mpa == NULL) {
1323		err = -ENOMEM;
1324		CTR3(KTR_IW_CXGBE, "%s:smr1 ep: %p , error: %d",
1325				__func__, ep, err);
1326		goto err;
1327	}
1328
1329	memset(mpa, 0, mpalen);
1330	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
1331	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
1332		(markers_enabled ? MPA_MARKERS : 0) |
1333		(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
1334	mpa->private_data_size = htons(ep->plen);
1335	mpa->revision = mpa_rev_to_use;
1336
1337	if (mpa_rev_to_use == 1) {
1338		ep->tried_with_mpa_v1 = 1;
1339		ep->retry_with_mpa_v1 = 0;
1340	}
1341
1342	if (mpa_rev_to_use == 2) {
1343		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
1344					    sizeof(struct mpa_v2_conn_params));
1345		mpa_v2_params.ird = htons((u16)ep->ird);
1346		mpa_v2_params.ord = htons((u16)ep->ord);
1347
1348		if (peer2peer) {
1349			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1350
1351			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1352				mpa_v2_params.ord |=
1353				    htons(MPA_V2_RDMA_WRITE_RTR);
1354			} else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1355				mpa_v2_params.ord |=
1356					htons(MPA_V2_RDMA_READ_RTR);
1357			}
1358		}
1359		memcpy(mpa->private_data, &mpa_v2_params,
1360			sizeof(struct mpa_v2_conn_params));
1361
1362		if (ep->plen) {
1363
1364			memcpy(mpa->private_data +
1365				sizeof(struct mpa_v2_conn_params),
1366				ep->mpa_pkt + sizeof(*mpa), ep->plen);
1367		}
1368	} else {
1369
1370		if (ep->plen)
1371			memcpy(mpa->private_data,
1372					ep->mpa_pkt + sizeof(*mpa), ep->plen);
1373		CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep);
1374	}
1375
1376	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1377	if (m == NULL) {
1378		err = -ENOMEM;
1379		CTR3(KTR_IW_CXGBE, "%s:smr2 ep: %p , error: %d",
1380				__func__, ep, err);
1381		free(mpa, M_CXGBE);
1382		goto err;
1383	}
1384	m_copyback(m, 0, mpalen, (void *)mpa);
1385	free(mpa, M_CXGBE);
1386
1387	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1388			ep->com.thread);
1389	if (err) {
1390		CTR3(KTR_IW_CXGBE, "%s:smr3 ep: %p , error: %d",
1391				__func__, ep, err);
1392		goto err;
1393	}
1394
1395	START_EP_TIMER(ep);
1396	ep->com.state = MPA_REQ_SENT;
1397	ep->mpa_attr.initiator = 1;
1398	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1399	return 0;
1400err:
1401	connect_reply_upcall(ep, err);
1402	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1403	return err;
1404}
1405
1406static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1407{
1408	int mpalen ;
1409	struct mpa_message *mpa;
1410	struct mpa_v2_conn_params mpa_v2_params;
1411	struct mbuf *m;
1412	int err;
1413
1414	CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid,
1415	    ep->plen);
1416
1417	mpalen = sizeof(*mpa) + plen;
1418
1419	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1420
1421		mpalen += sizeof(struct mpa_v2_conn_params);
1422		CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep,
1423		    ep->mpa_attr.version, mpalen);
1424	}
1425
1426	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1427	if (mpa == NULL)
1428		return (-ENOMEM);
1429
1430	memset(mpa, 0, mpalen);
1431	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1432	mpa->flags = MPA_REJECT;
1433	mpa->revision = mpa_rev;
1434	mpa->private_data_size = htons(plen);
1435
1436	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1437
1438		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1439		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
1440					    sizeof(struct mpa_v2_conn_params));
1441		mpa_v2_params.ird = htons(((u16)ep->ird) |
1442				(peer2peer ? MPA_V2_PEER2PEER_MODEL :
1443				 0));
1444		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1445					(p2p_type ==
1446					 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1447					 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1448					 FW_RI_INIT_P2PTYPE_READ_REQ ?
1449					 MPA_V2_RDMA_READ_RTR : 0) : 0));
1450		memcpy(mpa->private_data, &mpa_v2_params,
1451				sizeof(struct mpa_v2_conn_params));
1452
1453		if (ep->plen)
1454			memcpy(mpa->private_data +
1455				sizeof(struct mpa_v2_conn_params), pdata, plen);
1456		CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
1457		    mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
1458	} else
1459		if (plen)
1460			memcpy(mpa->private_data, pdata, plen);
1461
1462	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1463	if (m == NULL) {
1464		free(mpa, M_CXGBE);
1465		return (-ENOMEM);
1466	}
1467	m_copyback(m, 0, mpalen, (void *)mpa);
1468	free(mpa, M_CXGBE);
1469
1470	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
1471	if (!err)
1472		ep->snd_seq += mpalen;
1473	CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err);
1474	return err;
1475}
1476
1477static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1478{
1479	int mpalen;
1480	struct mpa_message *mpa;
1481	struct mbuf *m;
1482	struct mpa_v2_conn_params mpa_v2_params;
1483	int err;
1484
1485	CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep);
1486
1487	mpalen = sizeof(*mpa) + plen;
1488
1489	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1490
1491		CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep,
1492		    ep->mpa_attr.version);
1493		mpalen += sizeof(struct mpa_v2_conn_params);
1494	}
1495
1496	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1497	if (mpa == NULL)
1498		return (-ENOMEM);
1499
1500	memset(mpa, 0, sizeof(*mpa));
1501	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1502	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
1503		(markers_enabled ? MPA_MARKERS : 0);
1504	mpa->revision = ep->mpa_attr.version;
1505	mpa->private_data_size = htons(plen);
1506
1507	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1508
1509		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1510		mpa->private_data_size +=
1511			htons(sizeof(struct mpa_v2_conn_params));
1512		mpa_v2_params.ird = htons((u16)ep->ird);
1513		mpa_v2_params.ord = htons((u16)ep->ord);
1514		CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep,
1515		    ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord);
1516
1517		if (peer2peer && (ep->mpa_attr.p2p_type !=
1518			FW_RI_INIT_P2PTYPE_DISABLED)) {
1519
1520			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1521
1522			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1523
1524				mpa_v2_params.ord |=
1525					htons(MPA_V2_RDMA_WRITE_RTR);
1526				CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d",
1527				    __func__, ep, p2p_type, mpa_v2_params.ird,
1528				    mpa_v2_params.ord);
1529			}
1530			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1531
1532				mpa_v2_params.ord |=
1533					htons(MPA_V2_RDMA_READ_RTR);
1534				CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d",
1535				    __func__, ep, p2p_type, mpa_v2_params.ird,
1536				    mpa_v2_params.ord);
1537			}
1538		}
1539
1540		memcpy(mpa->private_data, &mpa_v2_params,
1541			sizeof(struct mpa_v2_conn_params));
1542
1543		if (ep->plen)
1544			memcpy(mpa->private_data +
1545				sizeof(struct mpa_v2_conn_params), pdata, plen);
1546	} else
1547		if (plen)
1548			memcpy(mpa->private_data, pdata, plen);
1549
1550	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1551	if (m == NULL) {
1552		free(mpa, M_CXGBE);
1553		return (-ENOMEM);
1554	}
1555	m_copyback(m, 0, mpalen, (void *)mpa);
1556	free(mpa, M_CXGBE);
1557
1558
1559	ep->com.state = MPA_REP_SENT;
1560	ep->snd_seq += mpalen;
1561	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1562			ep->com.thread);
1563	CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err);
1564	return err;
1565}
1566
1567
1568
1569static void close_complete_upcall(struct c4iw_ep *ep, int status)
1570{
1571	struct iw_cm_event event;
1572
1573	CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep);
1574	memset(&event, 0, sizeof(event));
1575	event.event = IW_CM_EVENT_CLOSE;
1576	event.status = status;
1577
1578	if (ep->com.cm_id) {
1579
1580		CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep);
1581		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1582		deref_cm_id(&ep->com);
1583		set_bit(CLOSE_UPCALL, &ep->com.history);
1584	}
1585	CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep);
1586}
1587
1588static int
1589send_abort(struct c4iw_ep *ep)
1590{
1591	struct socket *so = ep->com.so;
1592	struct sockopt sopt;
1593	int rc;
1594	struct linger l;
1595
1596	CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep, so,
1597	    states[ep->com.state], ep->hwtid);
1598
1599	l.l_onoff = 1;
1600	l.l_linger = 0;
1601
1602	/* linger_time of 0 forces RST to be sent */
1603	sopt.sopt_dir = SOPT_SET;
1604	sopt.sopt_level = SOL_SOCKET;
1605	sopt.sopt_name = SO_LINGER;
1606	sopt.sopt_val = (caddr_t)&l;
1607	sopt.sopt_valsize = sizeof l;
1608	sopt.sopt_td = NULL;
1609	rc = sosetopt(so, &sopt);
1610	if (rc != 0) {
1611		log(LOG_ERR, "%s: sosetopt(%p, linger = 0) failed with %d.\n",
1612		    __func__, so, rc);
1613	}
1614
1615	uninit_iwarp_socket(so);
1616	soclose(so);
1617	set_bit(ABORT_CONN, &ep->com.history);
1618
1619	/*
1620	 * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT
1621	 * request it has sent. But the current TOE driver is not propagating
1622	 * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work-
1623	 * around de-refererece 'ep' here instead of doing it in abort_rpl()
1624	 * handler(not yet implemented) of iw_cxgbe driver.
1625	 */
1626	release_ep_resources(ep);
1627
1628	return (0);
1629}
1630
1631static void peer_close_upcall(struct c4iw_ep *ep)
1632{
1633	struct iw_cm_event event;
1634
1635	CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep);
1636	memset(&event, 0, sizeof(event));
1637	event.event = IW_CM_EVENT_DISCONNECT;
1638
1639	if (ep->com.cm_id) {
1640
1641		CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep);
1642		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1643		set_bit(DISCONN_UPCALL, &ep->com.history);
1644	}
1645	CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep);
1646}
1647
1648static void peer_abort_upcall(struct c4iw_ep *ep)
1649{
1650	struct iw_cm_event event;
1651
1652	CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep);
1653	memset(&event, 0, sizeof(event));
1654	event.event = IW_CM_EVENT_CLOSE;
1655	event.status = -ECONNRESET;
1656
1657	if (ep->com.cm_id) {
1658
1659		CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep);
1660		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1661		deref_cm_id(&ep->com);
1662		set_bit(ABORT_UPCALL, &ep->com.history);
1663	}
1664	CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep);
1665}
1666
1667static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1668{
1669	struct iw_cm_event event;
1670
1671	CTR3(KTR_IW_CXGBE, "%s:cruB %p, status: %d", __func__, ep, status);
1672	memset(&event, 0, sizeof(event));
1673	event.event = IW_CM_EVENT_CONNECT_REPLY;
1674	event.status = ((status == -ECONNABORTED) || (status == -EPIPE)) ?
1675					-ECONNRESET : status;
1676	event.local_addr = ep->com.local_addr;
1677	event.remote_addr = ep->com.remote_addr;
1678
1679	if ((status == 0) || (status == -ECONNREFUSED)) {
1680
1681		if (!ep->tried_with_mpa_v1) {
1682
1683			CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
1684			/* this means MPA_v2 is used */
1685			event.ord = ep->ird;
1686			event.ird = ep->ord;
1687			event.private_data_len = ep->plen -
1688				sizeof(struct mpa_v2_conn_params);
1689			event.private_data = ep->mpa_pkt +
1690				sizeof(struct mpa_message) +
1691				sizeof(struct mpa_v2_conn_params);
1692		} else {
1693
1694			CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
1695			/* this means MPA_v1 is used */
1696			event.ord = c4iw_max_read_depth;
1697			event.ird = c4iw_max_read_depth;
1698			event.private_data_len = ep->plen;
1699			event.private_data = ep->mpa_pkt +
1700				sizeof(struct mpa_message);
1701		}
1702	}
1703
1704	if (ep->com.cm_id) {
1705
1706		CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep);
1707		set_bit(CONN_RPL_UPCALL, &ep->com.history);
1708		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1709	}
1710
1711	if(status == -ECONNABORTED) {
1712
1713		CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status);
1714		return;
1715	}
1716
1717	if (status < 0) {
1718
1719		CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status);
1720		deref_cm_id(&ep->com);
1721	}
1722
1723	CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep);
1724}
1725
1726static int connect_request_upcall(struct c4iw_ep *ep)
1727{
1728	struct iw_cm_event event;
1729	int ret;
1730
1731	CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep,
1732	    ep->tried_with_mpa_v1);
1733
1734	memset(&event, 0, sizeof(event));
1735	event.event = IW_CM_EVENT_CONNECT_REQUEST;
1736	event.local_addr = ep->com.local_addr;
1737	event.remote_addr = ep->com.remote_addr;
1738	event.provider_data = ep;
1739
1740	if (!ep->tried_with_mpa_v1) {
1741		/* this means MPA_v2 is used */
1742		event.ord = ep->ord;
1743		event.ird = ep->ird;
1744		event.private_data_len = ep->plen -
1745			sizeof(struct mpa_v2_conn_params);
1746		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1747			sizeof(struct mpa_v2_conn_params);
1748	} else {
1749
1750		/* this means MPA_v1 is used. Send max supported */
1751		event.ord = c4iw_max_read_depth;
1752		event.ird = c4iw_max_read_depth;
1753		event.private_data_len = ep->plen;
1754		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1755	}
1756
1757	c4iw_get_ep(&ep->com);
1758	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1759	    &event);
1760	if(ret) {
1761		CTR3(KTR_IW_CXGBE, "%s: ep %p, Failure while notifying event to"
1762			" IWCM, err:%d", __func__, ep, ret);
1763		c4iw_put_ep(&ep->com);
1764	} else
1765		/* Dereference parent_ep only in success case.
1766		 * In case of failure, parent_ep is dereferenced by the caller
1767		 * of process_mpa_request().
1768		 */
1769		c4iw_put_ep(&ep->parent_ep->com);
1770
1771	set_bit(CONNREQ_UPCALL, &ep->com.history);
1772	return ret;
1773}
1774
1775static void established_upcall(struct c4iw_ep *ep)
1776{
1777	struct iw_cm_event event;
1778
1779	CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep);
1780	memset(&event, 0, sizeof(event));
1781	event.event = IW_CM_EVENT_ESTABLISHED;
1782	event.ird = ep->ord;
1783	event.ord = ep->ird;
1784
1785	if (ep->com.cm_id) {
1786
1787		CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep);
1788		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1789		set_bit(ESTAB_UPCALL, &ep->com.history);
1790	}
1791	CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep);
1792}
1793
1794
1795#define RELAXED_IRD_NEGOTIATION 1
1796
1797/*
1798 * process_mpa_reply - process streaming mode MPA reply
1799 *
1800 * Returns:
1801 *
1802 * 0 upon success indicating a connect request was delivered to the ULP
1803 * or the mpa request is incomplete but valid so far.
1804 *
1805 * 1 if a failure requires the caller to close the connection.
1806 *
1807 * 2 if a failure requires the caller to abort the connection.
1808 */
1809static int process_mpa_reply(struct c4iw_ep *ep)
1810{
1811	struct mpa_message *mpa;
1812	struct mpa_v2_conn_params *mpa_v2_params;
1813	u16 plen;
1814	u16 resp_ird, resp_ord;
1815	u8 rtr_mismatch = 0, insuff_ird = 0;
1816	struct c4iw_qp_attributes attrs = {0};
1817	enum c4iw_qp_attr_mask mask;
1818	int err;
1819	struct mbuf *top, *m;
1820	int flags = MSG_DONTWAIT;
1821	struct uio uio;
1822	int disconnect = 0;
1823
1824	CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep);
1825
1826	/*
1827	 * Stop mpa timer.  If it expired, then
1828	 * we ignore the MPA reply.  process_timeout()
1829	 * will abort the connection.
1830	 */
1831	if (STOP_EP_TIMER(ep))
1832		return 0;
1833
1834	uio.uio_resid = 1000000;
1835	uio.uio_td = ep->com.thread;
1836	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
1837
1838	if (err) {
1839
1840		if (err == EWOULDBLOCK) {
1841
1842			CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep);
1843			START_EP_TIMER(ep);
1844			return 0;
1845		}
1846		err = -err;
1847		CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep);
1848		goto err;
1849	}
1850
1851	if (ep->com.so->so_rcv.sb_mb) {
1852
1853		CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep);
1854		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
1855		       __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
1856	}
1857
1858	m = top;
1859
1860	do {
1861
1862		CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep);
1863		/*
1864		 * If we get more than the supported amount of private data
1865		 * then we must fail this connection.
1866		 */
1867		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
1868
1869			CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep,
1870			    ep->mpa_pkt_len + m->m_len);
1871			err = (-EINVAL);
1872			goto err_stop_timer;
1873		}
1874
1875		/*
1876		 * copy the new data into our accumulation buffer.
1877		 */
1878		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
1879		ep->mpa_pkt_len += m->m_len;
1880		if (!m->m_next)
1881			m = m->m_nextpkt;
1882		else
1883			m = m->m_next;
1884	} while (m);
1885
1886	m_freem(top);
1887	/*
1888	 * if we don't even have the mpa message, then bail.
1889	 */
1890	if (ep->mpa_pkt_len < sizeof(*mpa)) {
1891		return 0;
1892	}
1893	mpa = (struct mpa_message *) ep->mpa_pkt;
1894
1895	/* Validate MPA header. */
1896	if (mpa->revision > mpa_rev) {
1897
1898		CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep,
1899		    mpa->revision, mpa_rev);
1900		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, "
1901				" Received = %d\n", __func__, mpa_rev, mpa->revision);
1902		err = -EPROTO;
1903		goto err_stop_timer;
1904	}
1905
1906	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1907
1908		CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep);
1909		err = -EPROTO;
1910		goto err_stop_timer;
1911	}
1912
1913	plen = ntohs(mpa->private_data_size);
1914
1915	/*
1916	 * Fail if there's too much private data.
1917	 */
1918	if (plen > MPA_MAX_PRIVATE_DATA) {
1919
1920		CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep);
1921		err = -EPROTO;
1922		goto err_stop_timer;
1923	}
1924
1925	/*
1926	 * If plen does not account for pkt size
1927	 */
1928	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1929
1930		CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep);
1931		STOP_EP_TIMER(ep);
1932		err = -EPROTO;
1933		goto err_stop_timer;
1934	}
1935
1936	ep->plen = (u8) plen;
1937
1938	/*
1939	 * If we don't have all the pdata yet, then bail.
1940	 * We'll continue process when more data arrives.
1941	 */
1942	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
1943
1944		CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep);
1945		return 0;
1946	}
1947
1948	if (mpa->flags & MPA_REJECT) {
1949
1950		CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep);
1951		err = -ECONNREFUSED;
1952		goto err_stop_timer;
1953	}
1954
1955	/*
1956	 * If we get here we have accumulated the entire mpa
1957	 * start reply message including private data. And
1958	 * the MPA header is valid.
1959	 */
1960	ep->com.state = FPDU_MODE;
1961	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1962	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1963	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1964	ep->mpa_attr.version = mpa->revision;
1965	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1966
1967	if (mpa->revision == 2) {
1968
1969		CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep);
1970		ep->mpa_attr.enhanced_rdma_conn =
1971			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1972
1973		if (ep->mpa_attr.enhanced_rdma_conn) {
1974
1975			CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep);
1976			mpa_v2_params = (struct mpa_v2_conn_params *)
1977				(ep->mpa_pkt + sizeof(*mpa));
1978			resp_ird = ntohs(mpa_v2_params->ird) &
1979				MPA_V2_IRD_ORD_MASK;
1980			resp_ord = ntohs(mpa_v2_params->ord) &
1981				MPA_V2_IRD_ORD_MASK;
1982
1983			/*
1984			 * This is a double-check. Ideally, below checks are
1985			 * not required since ird/ord stuff has been taken
1986			 * care of in c4iw_accept_cr
1987			 */
1988			if (ep->ird < resp_ord) {
1989				if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1990				   ep->com.dev->rdev.adap->params.max_ordird_qp)
1991					ep->ird = resp_ord;
1992				else
1993					insuff_ird = 1;
1994			} else if (ep->ird > resp_ord) {
1995				ep->ird = resp_ord;
1996			}
1997			if (ep->ord > resp_ird) {
1998				if (RELAXED_IRD_NEGOTIATION)
1999					ep->ord = resp_ird;
2000				else
2001					insuff_ird = 1;
2002			}
2003			if (insuff_ird) {
2004				err = -ENOMEM;
2005				ep->ird = resp_ord;
2006				ep->ord = resp_ird;
2007			}
2008
2009			if (ntohs(mpa_v2_params->ird) &
2010				MPA_V2_PEER2PEER_MODEL) {
2011
2012				CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep);
2013				if (ntohs(mpa_v2_params->ord) &
2014					MPA_V2_RDMA_WRITE_RTR) {
2015
2016					CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep);
2017					ep->mpa_attr.p2p_type =
2018						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
2019				}
2020				else if (ntohs(mpa_v2_params->ord) &
2021					MPA_V2_RDMA_READ_RTR) {
2022
2023					CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep);
2024					ep->mpa_attr.p2p_type =
2025						FW_RI_INIT_P2PTYPE_READ_REQ;
2026				}
2027			}
2028		}
2029	} else {
2030
2031		CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep);
2032
2033		if (mpa->revision == 1) {
2034
2035			CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep);
2036
2037			if (peer2peer) {
2038
2039				CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep);
2040				ep->mpa_attr.p2p_type = p2p_type;
2041			}
2042		}
2043	}
2044
2045	if (set_tcpinfo(ep)) {
2046
2047		CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep);
2048		printf("%s set_tcpinfo error\n", __func__);
2049		err = -ECONNRESET;
2050		goto err;
2051	}
2052
2053	CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, "
2054	    "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__,
2055	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
2056	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
2057	    ep->mpa_attr.p2p_type);
2058
2059	/*
2060	 * If responder's RTR does not match with that of initiator, assign
2061	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
2062	 * generated when moving QP to RTS state.
2063	 * A TERM message will be sent after QP has moved to RTS state
2064	 */
2065	if ((ep->mpa_attr.version == 2) && peer2peer &&
2066		(ep->mpa_attr.p2p_type != p2p_type)) {
2067
2068		CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep);
2069		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
2070		rtr_mismatch = 1;
2071	}
2072
2073
2074	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
2075	attrs.mpa_attr = ep->mpa_attr;
2076	attrs.max_ird = ep->ird;
2077	attrs.max_ord = ep->ord;
2078	attrs.llp_stream_handle = ep;
2079	attrs.next_state = C4IW_QP_STATE_RTS;
2080
2081	mask = C4IW_QP_ATTR_NEXT_STATE |
2082		C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
2083		C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
2084
2085	/* bind QP and TID with INIT_WR */
2086	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
2087
2088	if (err) {
2089
2090		CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep);
2091		goto err;
2092	}
2093
2094	/*
2095	 * If responder's RTR requirement did not match with what initiator
2096	 * supports, generate TERM message
2097	 */
2098	if (rtr_mismatch) {
2099
2100		CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep);
2101		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
2102		attrs.layer_etype = LAYER_MPA | DDP_LLP;
2103		attrs.ecode = MPA_NOMATCH_RTR;
2104		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2105		attrs.send_term = 1;
2106		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2107			C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2108		err = -ENOMEM;
2109		disconnect = 1;
2110		goto out;
2111	}
2112
2113	/*
2114	 * Generate TERM if initiator IRD is not sufficient for responder
2115	 * provided ORD. Currently, we do the same behaviour even when
2116	 * responder provided IRD is also not sufficient as regards to
2117	 * initiator ORD.
2118	 */
2119	if (insuff_ird) {
2120
2121		CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep);
2122		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
2123				__func__);
2124		attrs.layer_etype = LAYER_MPA | DDP_LLP;
2125		attrs.ecode = MPA_INSUFF_IRD;
2126		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2127		attrs.send_term = 1;
2128		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2129			C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2130		err = -ENOMEM;
2131		disconnect = 1;
2132		goto out;
2133	}
2134	goto out;
2135err_stop_timer:
2136	STOP_EP_TIMER(ep);
2137err:
2138	disconnect = 2;
2139out:
2140	connect_reply_upcall(ep, err);
2141	CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep);
2142	return disconnect;
2143}
2144
2145/*
2146 * process_mpa_request - process streaming mode MPA request
2147 *
2148 * Returns:
2149 *
2150 * 0 upon success indicating a connect request was delivered to the ULP
2151 * or the mpa request is incomplete but valid so far.
2152 *
2153 * 1 if a failure requires the caller to close the connection.
2154 *
2155 * 2 if a failure requires the caller to abort the connection.
2156 */
2157static int
2158process_mpa_request(struct c4iw_ep *ep)
2159{
2160	struct mpa_message *mpa;
2161	struct mpa_v2_conn_params *mpa_v2_params;
2162	u16 plen;
2163	int flags = MSG_DONTWAIT;
2164	int rc;
2165	struct iovec iov;
2166	struct uio uio;
2167	enum c4iw_ep_state state = ep->com.state;
2168
2169	CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]);
2170
2171	if (state != MPA_REQ_WAIT)
2172		return 0;
2173
2174	iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len];
2175	iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
2176	uio.uio_iov = &iov;
2177	uio.uio_iovcnt = 1;
2178	uio.uio_offset = 0;
2179	uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
2180	uio.uio_segflg = UIO_SYSSPACE;
2181	uio.uio_rw = UIO_READ;
2182	uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */
2183
2184	rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags);
2185	if (rc == EAGAIN)
2186		return 0;
2187	else if (rc)
2188		goto err_stop_timer;
2189
2190	KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data",
2191	    __func__, ep->com.so));
2192	ep->mpa_pkt_len += uio.uio_offset;
2193
2194	/*
2195	 * If we get more than the supported amount of private data then we must
2196	 * fail this connection.  XXX: check so_rcv->sb_cc, or peek with another
2197	 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last
2198	 * byte is filled by the soreceive above.
2199	 */
2200
2201	/* Don't even have the MPA message.  Wait for more data to arrive. */
2202	if (ep->mpa_pkt_len < sizeof(*mpa))
2203		return 0;
2204	mpa = (struct mpa_message *) ep->mpa_pkt;
2205
2206	/*
2207	 * Validate MPA Header.
2208	 */
2209	if (mpa->revision > mpa_rev) {
2210		log(LOG_ERR, "%s: MPA version mismatch. Local = %d,"
2211		    " Received = %d\n", __func__, mpa_rev, mpa->revision);
2212		goto err_stop_timer;
2213	}
2214
2215	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
2216		goto err_stop_timer;
2217
2218	/*
2219	 * Fail if there's too much private data.
2220	 */
2221	plen = ntohs(mpa->private_data_size);
2222	if (plen > MPA_MAX_PRIVATE_DATA)
2223		goto err_stop_timer;
2224
2225	/*
2226	 * If plen does not account for pkt size
2227	 */
2228	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
2229		goto err_stop_timer;
2230
2231	ep->plen = (u8) plen;
2232
2233	/*
2234	 * If we don't have all the pdata yet, then bail.
2235	 */
2236	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
2237		return 0;
2238
2239	/*
2240	 * If we get here we have accumulated the entire mpa
2241	 * start reply message including private data.
2242	 */
2243	ep->mpa_attr.initiator = 0;
2244	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
2245	ep->mpa_attr.recv_marker_enabled = markers_enabled;
2246	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
2247	ep->mpa_attr.version = mpa->revision;
2248	if (mpa->revision == 1)
2249		ep->tried_with_mpa_v1 = 1;
2250	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
2251
2252	if (mpa->revision == 2) {
2253		ep->mpa_attr.enhanced_rdma_conn =
2254		    mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
2255		if (ep->mpa_attr.enhanced_rdma_conn) {
2256			mpa_v2_params = (struct mpa_v2_conn_params *)
2257				(ep->mpa_pkt + sizeof(*mpa));
2258			ep->ird = ntohs(mpa_v2_params->ird) &
2259				MPA_V2_IRD_ORD_MASK;
2260			ep->ird = min_t(u32, ep->ird,
2261					cur_max_read_depth(ep->com.dev));
2262			ep->ord = ntohs(mpa_v2_params->ord) &
2263				MPA_V2_IRD_ORD_MASK;
2264			ep->ord = min_t(u32, ep->ord,
2265					cur_max_read_depth(ep->com.dev));
2266			CTR3(KTR_IW_CXGBE, "%s initiator ird %u ord %u\n",
2267				 __func__, ep->ird, ep->ord);
2268			if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
2269				if (peer2peer) {
2270					if (ntohs(mpa_v2_params->ord) &
2271							MPA_V2_RDMA_WRITE_RTR)
2272						ep->mpa_attr.p2p_type =
2273						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
2274					else if (ntohs(mpa_v2_params->ord) &
2275							MPA_V2_RDMA_READ_RTR)
2276						ep->mpa_attr.p2p_type =
2277						FW_RI_INIT_P2PTYPE_READ_REQ;
2278				}
2279		}
2280	} else if (mpa->revision == 1 && peer2peer)
2281		ep->mpa_attr.p2p_type = p2p_type;
2282
2283	if (set_tcpinfo(ep))
2284		goto err_stop_timer;
2285
2286	CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, "
2287	    "xmit_marker_enabled = %d, version = %d", __func__,
2288	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
2289	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
2290
2291	ep->com.state = MPA_REQ_RCVD;
2292	STOP_EP_TIMER(ep);
2293
2294	/* drive upcall */
2295	if (ep->parent_ep->com.state != DEAD)
2296		if (connect_request_upcall(ep))
2297			goto err_out;
2298	return 0;
2299
2300err_stop_timer:
2301	STOP_EP_TIMER(ep);
2302err_out:
2303	return 2;
2304}
2305
2306/*
2307 * Upcall from the adapter indicating data has been transmitted.
2308 * For us its just the single MPA request or reply.  We can now free
2309 * the skb holding the mpa message.
2310 */
2311int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2312{
2313	int err;
2314	struct c4iw_ep *ep = to_ep(cm_id);
2315	int abort = 0;
2316
2317	mutex_lock(&ep->com.mutex);
2318	CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep);
2319
2320	if ((ep->com.state == DEAD) ||
2321			(ep->com.state != MPA_REQ_RCVD)) {
2322
2323		CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep);
2324		mutex_unlock(&ep->com.mutex);
2325		c4iw_put_ep(&ep->com);
2326		return -ECONNRESET;
2327	}
2328	set_bit(ULP_REJECT, &ep->com.history);
2329
2330	if (mpa_rev == 0) {
2331
2332		CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep);
2333		abort = 1;
2334	}
2335	else {
2336
2337		CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep);
2338		abort = send_mpa_reject(ep, pdata, pdata_len);
2339	}
2340	STOP_EP_TIMER(ep);
2341	err = c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
2342	mutex_unlock(&ep->com.mutex);
2343	c4iw_put_ep(&ep->com);
2344	CTR3(KTR_IW_CXGBE, "%s:crc4 %p, err: %d", __func__, ep, err);
2345	return 0;
2346}
2347
2348int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2349{
2350	int err;
2351	struct c4iw_qp_attributes attrs = {0};
2352	enum c4iw_qp_attr_mask mask;
2353	struct c4iw_ep *ep = to_ep(cm_id);
2354	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2355	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2356	int abort = 0;
2357
2358	mutex_lock(&ep->com.mutex);
2359	CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep);
2360
2361	if ((ep->com.state == DEAD) ||
2362			(ep->com.state != MPA_REQ_RCVD)) {
2363
2364		CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep);
2365		err = -ECONNRESET;
2366		goto err_out;
2367	}
2368
2369	BUG_ON(!qp);
2370
2371	set_bit(ULP_ACCEPT, &ep->com.history);
2372
2373	if ((conn_param->ord > c4iw_max_read_depth) ||
2374		(conn_param->ird > c4iw_max_read_depth)) {
2375
2376		CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep);
2377		err = -EINVAL;
2378		goto err_abort;
2379	}
2380
2381	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2382
2383		CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep);
2384
2385		if (conn_param->ord > ep->ird) {
2386			if (RELAXED_IRD_NEGOTIATION) {
2387				conn_param->ord = ep->ird;
2388			} else {
2389				ep->ird = conn_param->ird;
2390				ep->ord = conn_param->ord;
2391				send_mpa_reject(ep, conn_param->private_data,
2392						conn_param->private_data_len);
2393				err = -ENOMEM;
2394				goto err_abort;
2395			}
2396		}
2397		if (conn_param->ird < ep->ord) {
2398			if (RELAXED_IRD_NEGOTIATION &&
2399			    ep->ord <= h->rdev.adap->params.max_ordird_qp) {
2400				conn_param->ird = ep->ord;
2401			} else {
2402				err = -ENOMEM;
2403				goto err_abort;
2404			}
2405		}
2406	}
2407	ep->ird = conn_param->ird;
2408	ep->ord = conn_param->ord;
2409
2410	if (ep->mpa_attr.version == 1) {
2411		if (peer2peer && ep->ird == 0)
2412			ep->ird = 1;
2413	} else {
2414		if (peer2peer &&
2415		    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
2416		    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
2417			ep->ird = 1;
2418	}
2419
2420	CTR4(KTR_IW_CXGBE, "%s %d ird %d ord %d\n", __func__, __LINE__,
2421			ep->ird, ep->ord);
2422
2423	ep->com.cm_id = cm_id;
2424	ref_cm_id(&ep->com);
2425	ep->com.qp = qp;
2426	ref_qp(ep);
2427	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
2428
2429	/* bind QP to EP and move to RTS */
2430	attrs.mpa_attr = ep->mpa_attr;
2431	attrs.max_ird = ep->ird;
2432	attrs.max_ord = ep->ord;
2433	attrs.llp_stream_handle = ep;
2434	attrs.next_state = C4IW_QP_STATE_RTS;
2435
2436	/* bind QP and TID with INIT_WR */
2437	mask = C4IW_QP_ATTR_NEXT_STATE |
2438		C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2439		C4IW_QP_ATTR_MPA_ATTR |
2440		C4IW_QP_ATTR_MAX_IRD |
2441		C4IW_QP_ATTR_MAX_ORD;
2442
2443	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
2444	if (err) {
2445		CTR3(KTR_IW_CXGBE, "%s:caca %p, err: %d", __func__, ep, err);
2446		goto err_defef_cm_id;
2447	}
2448
2449	err = send_mpa_reply(ep, conn_param->private_data,
2450			conn_param->private_data_len);
2451	if (err) {
2452		CTR3(KTR_IW_CXGBE, "%s:cacb %p, err: %d", __func__, ep, err);
2453		goto err_defef_cm_id;
2454	}
2455
2456	ep->com.state = FPDU_MODE;
2457	established_upcall(ep);
2458	mutex_unlock(&ep->com.mutex);
2459	c4iw_put_ep(&ep->com);
2460	CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep);
2461	return 0;
2462err_defef_cm_id:
2463	deref_cm_id(&ep->com);
2464err_abort:
2465	abort = 1;
2466err_out:
2467	if (abort)
2468		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2469	mutex_unlock(&ep->com.mutex);
2470	c4iw_put_ep(&ep->com);
2471	CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep);
2472	return err;
2473}
2474
2475static int
2476c4iw_sock_create(struct sockaddr_storage *laddr, struct socket **so)
2477{
2478	int ret;
2479	int size;
2480	struct socket *sock = NULL;
2481
2482	ret = sock_create_kern(laddr->ss_family,
2483			SOCK_STREAM, IPPROTO_TCP, &sock);
2484	if (ret) {
2485		CTR2(KTR_IW_CXGBE, "%s:Failed to create TCP socket. err %d",
2486				__func__, ret);
2487		return ret;
2488	}
2489
2490	ret = sobind(sock, (struct sockaddr *)laddr, curthread);
2491	if (ret) {
2492		CTR2(KTR_IW_CXGBE, "%s:Failed to bind socket. err %p",
2493				__func__, ret);
2494		sock_release(sock);
2495		return ret;
2496	}
2497
2498	size = laddr->ss_family == AF_INET6 ?
2499		sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
2500	ret = sock_getname(sock, (struct sockaddr *)laddr, &size, 0);
2501	if (ret) {
2502		CTR2(KTR_IW_CXGBE, "%s:sock_getname failed. err %p",
2503				__func__, ret);
2504		sock_release(sock);
2505		return ret;
2506	}
2507
2508	*so = sock;
2509	return 0;
2510}
2511
2512int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2513{
2514	int err = 0;
2515	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2516	struct c4iw_ep *ep = NULL;
2517	struct ifnet    *nh_ifp;        /* Logical egress interface */
2518#ifdef VIMAGE
2519	struct rdma_cm_id *rdma_id = (struct rdma_cm_id*)cm_id->context;
2520	struct vnet *vnet = rdma_id->route.addr.dev_addr.net;
2521#endif
2522
2523	CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id);
2524
2525
2526	if ((conn_param->ord > c4iw_max_read_depth) ||
2527		(conn_param->ird > c4iw_max_read_depth)) {
2528
2529		CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id);
2530		err = -EINVAL;
2531		goto out;
2532	}
2533	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2534	cm_id->provider_data = ep;
2535
2536	init_timer(&ep->timer);
2537	ep->plen = conn_param->private_data_len;
2538
2539	if (ep->plen) {
2540
2541		CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep);
2542		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
2543				conn_param->private_data, ep->plen);
2544	}
2545	ep->ird = conn_param->ird;
2546	ep->ord = conn_param->ord;
2547
2548	if (peer2peer && ep->ord == 0) {
2549
2550		CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep);
2551		ep->ord = 1;
2552	}
2553
2554	ep->com.dev = dev;
2555	ep->com.cm_id = cm_id;
2556	ref_cm_id(&ep->com);
2557	ep->com.qp = get_qhp(dev, conn_param->qpn);
2558
2559	if (!ep->com.qp) {
2560
2561		CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep);
2562		err = -EINVAL;
2563		goto fail;
2564	}
2565	ref_qp(ep);
2566	ep->com.thread = curthread;
2567
2568	CURVNET_SET(vnet);
2569	err = get_ifnet_from_raddr(&cm_id->remote_addr, &nh_ifp);
2570	CURVNET_RESTORE();
2571
2572	if (err) {
2573
2574		CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep);
2575		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
2576		err = EHOSTUNREACH;
2577		return err;
2578	}
2579
2580	if (!(nh_ifp->if_capenable & IFCAP_TOE) ||
2581	    TOEDEV(nh_ifp) == NULL) {
2582		err = -ENOPROTOOPT;
2583		goto fail;
2584	}
2585	ep->com.state = CONNECTING;
2586	ep->tos = 0;
2587	ep->com.local_addr = cm_id->local_addr;
2588	ep->com.remote_addr = cm_id->remote_addr;
2589
2590	err = c4iw_sock_create(&cm_id->local_addr, &ep->com.so);
2591	if (err)
2592		goto fail;
2593
2594	setiwsockopt(ep->com.so);
2595	err = -soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
2596		ep->com.thread);
2597	if (!err) {
2598		init_iwarp_socket(ep->com.so, &ep->com);
2599		goto out;
2600	} else
2601		goto fail_free_so;
2602
2603fail_free_so:
2604	sock_release(ep->com.so);
2605fail:
2606	deref_cm_id(&ep->com);
2607	c4iw_put_ep(&ep->com);
2608	ep = NULL;
2609out:
2610	CTR2(KTR_IW_CXGBE, "%s:ccE ret:%d", __func__, err);
2611	return err;
2612}
2613
2614/*
2615 * iwcm->create_listen.  Returns -errno on failure.
2616 */
2617int
2618c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
2619{
2620	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2621	struct c4iw_listen_ep *lep = NULL;
2622	struct listen_port_info *port_info = NULL;
2623	int rc = 0;
2624
2625	CTR3(KTR_IW_CXGBE, "%s: cm_id %p, backlog %s", __func__, cm_id,
2626			backlog);
2627	lep = alloc_ep(sizeof(*lep), GFP_KERNEL);
2628	lep->com.cm_id = cm_id;
2629	ref_cm_id(&lep->com);
2630	lep->com.dev = dev;
2631	lep->backlog = backlog;
2632	lep->com.local_addr = cm_id->local_addr;
2633	lep->com.thread = curthread;
2634	cm_id->provider_data = lep;
2635	lep->com.state = LISTEN;
2636
2637	/* In case of INDADDR_ANY, ibcore creates cmid for each device and
2638	 * invokes iw_cxgbe listener callbacks assuming that iw_cxgbe creates
2639	 * HW listeners for each device seperately. But toecore expects single
2640	 * solisten() call with INADDR_ANY address to create HW listeners on
2641	 * all devices for a given port number. So iw_cxgbe driver calls
2642	 * solisten() only once for INADDR_ANY(usually done at first time
2643	 * listener callback from ibcore). And all the subsequent INADDR_ANY
2644	 * listener callbacks from ibcore(for the same port address) do not
2645	 * invoke solisten() as first listener callback has already created
2646	 * listeners for all other devices(via solisten).
2647	 */
2648	if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr)) {
2649		port_info = add_ep_to_listenlist(lep);
2650		/* skip solisten() if refcnt > 1, as the listeners were
2651		 * alredy created by 'Master lep'
2652		 */
2653		if (port_info->refcnt > 1) {
2654			/* As there will be only one listener socket for a TCP
2655			 * port, copy Master lep's socket pointer to other lep's
2656			 * that are belonging to same TCP port.
2657			 */
2658			struct c4iw_listen_ep *head_lep =
2659					container_of(port_info->lep_list.next,
2660					struct c4iw_listen_ep, listen_ep_list);
2661			lep->com.so =  head_lep->com.so;
2662			goto out;
2663		}
2664	}
2665	rc = c4iw_sock_create(&cm_id->local_addr, &lep->com.so);
2666	if (rc) {
2667		CTR2(KTR_IW_CXGBE, "%s:Failed to create socket. err %d",
2668				__func__, rc);
2669		goto fail;
2670	}
2671
2672	rc = solisten(lep->com.so, backlog, curthread);
2673	if (rc) {
2674		CTR3(KTR_IW_CXGBE, "%s:Failed to listen on sock:%p. err %d",
2675				__func__, lep->com.so, rc);
2676		goto fail_free_so;
2677	}
2678	init_iwarp_socket(lep->com.so, &lep->com);
2679out:
2680	return 0;
2681
2682fail_free_so:
2683	sock_release(lep->com.so);
2684fail:
2685	if (port_info)
2686		rem_ep_from_listenlist(lep);
2687	deref_cm_id(&lep->com);
2688	c4iw_put_ep(&lep->com);
2689	return rc;
2690}
2691
2692int
2693c4iw_destroy_listen(struct iw_cm_id *cm_id)
2694{
2695	struct c4iw_listen_ep *lep = to_listen_ep(cm_id);
2696
2697	mutex_lock(&lep->com.mutex);
2698	CTR3(KTR_IW_CXGBE, "%s: cm_id %p, state %s", __func__, cm_id,
2699	    states[lep->com.state]);
2700
2701	lep->com.state = DEAD;
2702	if (c4iw_any_addr((struct sockaddr *)&lep->com.local_addr)) {
2703		/* if no refcount then close listen socket */
2704		if (!rem_ep_from_listenlist(lep))
2705			close_socket(lep->com.so);
2706	} else
2707		close_socket(lep->com.so);
2708	deref_cm_id(&lep->com);
2709	mutex_unlock(&lep->com.mutex);
2710	c4iw_put_ep(&lep->com);
2711	return 0;
2712}
2713
2714int __c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2715{
2716	int ret;
2717	mutex_lock(&ep->com.mutex);
2718	ret = c4iw_ep_disconnect(ep, abrupt, gfp);
2719	mutex_unlock(&ep->com.mutex);
2720	return ret;
2721}
2722
2723int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2724{
2725	int ret = 0;
2726	int close = 0;
2727	int fatal = 0;
2728	struct c4iw_rdev *rdev;
2729
2730
2731	CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep);
2732
2733	rdev = &ep->com.dev->rdev;
2734
2735	if (c4iw_fatal_error(rdev)) {
2736
2737		CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep);
2738		fatal = 1;
2739		close_complete_upcall(ep, -ECONNRESET);
2740		send_abort(ep);
2741		ep->com.state = DEAD;
2742	}
2743	CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep,
2744	    states[ep->com.state]);
2745
2746	/*
2747	 * Ref the ep here in case we have fatal errors causing the
2748	 * ep to be released and freed.
2749	 */
2750	c4iw_get_ep(&ep->com);
2751	switch (ep->com.state) {
2752
2753		case MPA_REQ_WAIT:
2754		case MPA_REQ_SENT:
2755		case MPA_REQ_RCVD:
2756		case MPA_REP_SENT:
2757		case FPDU_MODE:
2758			close = 1;
2759			if (abrupt)
2760				ep->com.state = ABORTING;
2761			else {
2762				ep->com.state = CLOSING;
2763				START_EP_TIMER(ep);
2764			}
2765			set_bit(CLOSE_SENT, &ep->com.flags);
2766			break;
2767
2768		case CLOSING:
2769
2770			if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2771
2772				close = 1;
2773				if (abrupt) {
2774					STOP_EP_TIMER(ep);
2775					ep->com.state = ABORTING;
2776				} else
2777					ep->com.state = MORIBUND;
2778			}
2779			break;
2780
2781		case MORIBUND:
2782		case ABORTING:
2783		case DEAD:
2784			CTR3(KTR_IW_CXGBE,
2785			    "%s ignoring disconnect ep %p state %u", __func__,
2786			    ep, ep->com.state);
2787			break;
2788
2789		default:
2790			BUG();
2791			break;
2792	}
2793
2794
2795	if (close) {
2796
2797		CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep);
2798
2799		if (abrupt) {
2800
2801			CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep);
2802			set_bit(EP_DISC_ABORT, &ep->com.history);
2803			close_complete_upcall(ep, -ECONNRESET);
2804			ret = send_abort(ep);
2805			if (ret)
2806				fatal = 1;
2807		} else {
2808
2809			CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep);
2810			set_bit(EP_DISC_CLOSE, &ep->com.history);
2811
2812			if (!ep->parent_ep)
2813				ep->com.state = MORIBUND;
2814
2815			CURVNET_SET(ep->com.so->so_vnet);
2816			sodisconnect(ep->com.so);
2817			CURVNET_RESTORE();
2818		}
2819
2820	}
2821
2822	if (fatal) {
2823		set_bit(EP_DISC_FAIL, &ep->com.history);
2824		if (!abrupt) {
2825			STOP_EP_TIMER(ep);
2826			close_complete_upcall(ep, -EIO);
2827		}
2828		if (ep->com.qp) {
2829			struct c4iw_qp_attributes attrs = {0};
2830
2831			attrs.next_state = C4IW_QP_STATE_ERROR;
2832			ret = c4iw_modify_qp(ep->com.dev, ep->com.qp,
2833						C4IW_QP_ATTR_NEXT_STATE,
2834						&attrs, 1);
2835			if (ret) {
2836				CTR2(KTR_IW_CXGBE, "%s:ced7 %p", __func__, ep);
2837				printf("%s - qp <- error failed!\n", __func__);
2838			}
2839		}
2840		release_ep_resources(ep);
2841		ep->com.state = DEAD;
2842		CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep);
2843	}
2844	c4iw_put_ep(&ep->com);
2845	CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep);
2846	return ret;
2847}
2848
2849#ifdef C4IW_EP_REDIRECT
2850int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2851		struct l2t_entry *l2t)
2852{
2853	struct c4iw_ep *ep = ctx;
2854
2855	if (ep->dst != old)
2856		return 0;
2857
2858	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2859			l2t);
2860	dst_hold(new);
2861	cxgb4_l2t_release(ep->l2t);
2862	ep->l2t = l2t;
2863	dst_release(old);
2864	ep->dst = new;
2865	return 1;
2866}
2867#endif
2868
2869
2870
2871static void ep_timeout(unsigned long arg)
2872{
2873	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
2874
2875	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
2876
2877		/*
2878		 * Only insert if it is not already on the list.
2879		 */
2880		if (!(ep->com.ep_events & C4IW_EVENT_TIMEOUT)) {
2881			CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep);
2882			add_ep_to_req_list(ep, C4IW_EVENT_TIMEOUT);
2883		}
2884	}
2885}
2886
2887static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl)
2888{
2889	uint64_t val = be64toh(*rpl);
2890	int ret;
2891	struct c4iw_wr_wait *wr_waitp;
2892
2893	ret = (int)((val >> 8) & 0xff);
2894	wr_waitp = (struct c4iw_wr_wait *)rpl[1];
2895	CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret);
2896	if (wr_waitp)
2897		c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2898
2899	return (0);
2900}
2901
2902static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl)
2903{
2904	struct cqe_list_entry *cle;
2905	unsigned long flag;
2906
2907	cle = malloc(sizeof(*cle), M_CXGBE, M_NOWAIT);
2908	cle->rhp = sc->iwarp_softc;
2909	cle->err_cqe = *(const struct t4_cqe *)(&rpl[0]);
2910
2911	spin_lock_irqsave(&err_cqe_lock, flag);
2912	list_add_tail(&cle->entry, &err_cqe_list);
2913	queue_work(c4iw_taskq, &c4iw_task);
2914	spin_unlock_irqrestore(&err_cqe_lock, flag);
2915
2916	return (0);
2917}
2918
2919static int
2920process_terminate(struct c4iw_ep *ep)
2921{
2922	struct c4iw_qp_attributes attrs = {0};
2923
2924	CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep);
2925
2926	if (ep && ep->com.qp) {
2927
2928		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n",
2929				ep->hwtid, ep->com.qp->wq.sq.qid);
2930		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2931		c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs,
2932				1);
2933	} else
2934		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n",
2935								ep->hwtid);
2936	CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep);
2937
2938	return 0;
2939}
2940
2941int __init c4iw_cm_init(void)
2942{
2943
2944	t4_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
2945	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, fw6_wr_rpl);
2946	t4_register_fw_msg_handler(FW6_TYPE_CQE, fw6_cqe_handler);
2947	t4_register_an_handler(c4iw_ev_handler);
2948
2949	TAILQ_INIT(&req_list);
2950	spin_lock_init(&req_lock);
2951	INIT_LIST_HEAD(&err_cqe_list);
2952	spin_lock_init(&err_cqe_lock);
2953
2954	INIT_WORK(&c4iw_task, process_req);
2955
2956	c4iw_taskq = create_singlethread_workqueue("iw_cxgbe");
2957	if (!c4iw_taskq)
2958		return -ENOMEM;
2959
2960	return 0;
2961}
2962
2963void __exit c4iw_cm_term(void)
2964{
2965	WARN_ON(!TAILQ_EMPTY(&req_list));
2966	WARN_ON(!list_empty(&err_cqe_list));
2967	flush_workqueue(c4iw_taskq);
2968	destroy_workqueue(c4iw_taskq);
2969
2970	t4_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
2971	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, NULL);
2972	t4_register_fw_msg_handler(FW6_TYPE_CQE, NULL);
2973	t4_register_an_handler(NULL);
2974}
2975#endif
2976