rdsib_cm.c revision 4703:bb31c50bb3ab
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25/*
26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27 *
28 * This software is available to you under a choice of one of two
29 * licenses.  You may choose to be licensed under the terms of the GNU
30 * General Public License (GPL) Version 2, available from the file
31 * COPYING in the main directory of this source tree, or the
32 * OpenIB.org BSD license below:
33 *
34 *     Redistribution and use in source and binary forms, with or
35 *     without modification, are permitted provided that the following
36 *     conditions are met:
37 *
38 *	- Redistributions of source code must retain the above
39 *	  copyright notice, this list of conditions and the following
40 *	  disclaimer.
41 *
42 *	- Redistributions in binary form must reproduce the above
43 *	  copyright notice, this list of conditions and the following
44 *	  disclaimer in the documentation and/or other materials
45 *	  provided with the distribution.
46 *
47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54 * SOFTWARE.
55 *
56 */
57/*
58 * Sun elects to include this software in Sun product
59 * under the OpenIB BSD license.
60 *
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72 * POSSIBILITY OF SUCH DAMAGE.
73 */
74
75#pragma ident	"%Z%%M%	%I%	%E% SMI"
76
77#include <sys/ib/clients/rds/rdsib_cm.h>
78#include <sys/ib/clients/rds/rdsib_ib.h>
79#include <sys/ib/clients/rds/rdsib_buf.h>
80#include <sys/ib/clients/rds/rdsib_ep.h>
81
82/*
83 * This file contains CM related work:
84 *
85 * Service registration/deregistration
86 * Path lookup
87 * CM connection callbacks
88 * CM active and passive connection establishment
89 * Connection failover
90 */
91
92/*
93 * Handle an incoming CM REQ
94 */
95/* ARGSUSED */
96static ibt_cm_status_t
97rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp,
98    ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len)
99{
100	ibt_cm_req_rcv_t	*reqp;
101	ib_gid_t		lgid, rgid;
102	rds_cm_private_data_t	cmp;
103	rds_session_t		*sp;
104	rds_ep_t		*ep;
105	ibt_channel_hdl_t	chanhdl;
106	int			ret;
107
108	RDS_DPRINTF2("rds_handle_cm_req", "Enter");
109
110	reqp = &evp->cm_event.req;
111	rgid = reqp->req_prim_addr.av_dgid; /* requester gid */
112	lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */
113
114	RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx",
115	    rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid);
116
117	/* validate service id */
118	if (reqp->req_service_id == RDS_SERVICE_ID) {
119		RDS_DPRINTF0(LABEL, "Version Mismatch: Remote system "
120		    "(GUID: 0x%llx) is running an older version of RDS",
121		    rgid.gid_guid);
122		return (IBT_CM_REJECT);
123	}
124
125	/*
126	 * CM private data brings IP information
127	 * Private data received is a stream of bytes and may not be properly
128	 * aligned. So, bcopy the data onto the stack before accessing it.
129	 */
130	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
131	    sizeof (rds_cm_private_data_t));
132
133	RDS_DPRINTF2(LABEL, "REQ Received: From IP: 0x%x To IP: 0x%x type: %d",
134	    cmp.cmp_localip, cmp.cmp_remip, cmp.cmp_eptype);
135
136	if (cmp.cmp_version != RDS_VERSION) {
137		RDS_DPRINTF0(LABEL, "Version Mismatch: Local version: %d "
138		    "Remote version: %d", RDS_VERSION, cmp.cmp_version);
139		return (IBT_CM_REJECT);
140	}
141
142	if (cmp.cmp_arch != RDS_THIS_ARCH) {
143		RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)",
144		    cmp.cmp_arch, RDS_THIS_ARCH);
145		return (IBT_CM_REJECT);
146	}
147
148	if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) &&
149	    (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) {
150		RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype);
151		return (IBT_CM_REJECT);
152	}
153
154	/* user_buffer_size should be same on all nodes */
155	if (cmp.cmp_user_buffer_size != UserBufferSize) {
156		RDS_DPRINTF2(LABEL,
157		    "UserBufferSize Mismatch, this node: %d remote node: %d",
158		    UserBufferSize, cmp.cmp_user_buffer_size);
159		return (IBT_CM_REJECT);
160	}
161
162	/*
163	 * RDS needs more time to process a failover REQ so send an MRA.
164	 * Otherwise, the remote may retry the REQ and fail the connection.
165	 */
166	if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) {
167		RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA");
168		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id,
169		    10000000 /* 10 sec */, NULL, 0);
170	}
171
172	/* Is there a session to the destination node? */
173	rw_enter(&statep->rds_sessionlock, RW_READER);
174	sp = rds_session_lkup(statep, cmp.cmp_localip, rgid.gid_guid);
175	rw_exit(&statep->rds_sessionlock);
176
177	if (sp == NULL) {
178		/*
179		 * currently there is no session to the destination
180		 * remote ip in the private data is the local ip and vice
181		 * versa
182		 */
183		sp = rds_session_create(statep, cmp.cmp_remip, cmp.cmp_localip,
184		    reqp, RDS_SESSION_PASSIVE);
185		if (sp == NULL) {
186			/* Check the list anyway. */
187			rw_enter(&statep->rds_sessionlock, RW_READER);
188			sp = rds_session_lkup(statep, cmp.cmp_localip,
189			    rgid.gid_guid);
190			rw_exit(&statep->rds_sessionlock);
191			if (sp == NULL) {
192				/*
193				 * The only way this can fail is due to lack
194				 * of kernel resources
195				 */
196				return (IBT_CM_REJECT);
197			}
198		}
199	}
200
201	rw_enter(&sp->session_lock, RW_WRITER);
202
203	/* catch peer-to-peer case as soon as possible */
204	if ((sp->session_state == RDS_SESSION_STATE_CREATED) ||
205	    (sp->session_state == RDS_SESSION_STATE_INIT)) {
206		/* Check possible peer-to-peer case here */
207		if (sp->session_type != RDS_SESSION_PASSIVE) {
208			RDS_DPRINTF2("rds_handle_cm_req",
209			    "SP(%p) Peer-peer connection handling", sp);
210			if (lgid.gid_guid > rgid.gid_guid) {
211				/* this node is active so reject this request */
212				rw_exit(&sp->session_lock);
213				return (IBT_CM_REJECT);
214			} else {
215				/* this node is passive, change the session */
216				sp->session_type = RDS_SESSION_PASSIVE;
217				sp->session_lgid = lgid;
218				sp->session_rgid = rgid;
219			}
220		}
221	}
222
223	RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state);
224
225	switch (sp->session_state) {
226	case RDS_SESSION_STATE_CONNECTED:
227		RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp);
228		sp->session_state = RDS_SESSION_STATE_ERROR;
229		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
230		    "RDS_SESSION_STATE_ERROR", sp);
231
232		/* FALLTHRU */
233	case RDS_SESSION_STATE_ERROR:
234	case RDS_SESSION_STATE_PASSIVE_CLOSING:
235		sp->session_type = RDS_SESSION_PASSIVE;
236		rw_exit(&sp->session_lock);
237
238		rds_session_close(sp, IBT_NOCALLBACKS, 1);
239
240		/* move the session to init state */
241		rw_enter(&sp->session_lock, RW_WRITER);
242		ret = rds_session_reinit(sp, lgid);
243		sp->session_myip = cmp.cmp_remip;
244		sp->session_lgid = lgid;
245		sp->session_rgid = rgid;
246		if (ret != 0) {
247			rds_session_fini(sp);
248			sp->session_state = RDS_SESSION_STATE_FAILED;
249			RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
250			    "RDS_SESSION_STATE_FAILED", sp);
251			rw_exit(&sp->session_lock);
252			return (IBT_CM_REJECT);
253		} else {
254			sp->session_state = RDS_SESSION_STATE_INIT;
255			RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
256			    "RDS_SESSION_STATE_INIT", sp);
257		}
258
259		if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) {
260			ep = &sp->session_ctrlep;
261		} else {
262			ep = &sp->session_dataep;
263		}
264		break;
265	case RDS_SESSION_STATE_CREATED:
266	case RDS_SESSION_STATE_FAILED:
267	case RDS_SESSION_STATE_FINI:
268		/*
269		 * Initialize both channels, we accept this connection
270		 * only if both channels are initialized
271		 */
272		sp->session_type = RDS_SESSION_PASSIVE;
273		sp->session_lgid = lgid;
274		sp->session_rgid = rgid;
275		sp->session_state = RDS_SESSION_STATE_CREATED;
276		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
277		    "RDS_SESSION_STATE_CREATED", sp);
278		ret = rds_session_init(sp);
279		if (ret != 0) {
280			/* Seems like there are not enough resources */
281			sp->session_state = RDS_SESSION_STATE_FAILED;
282			RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
283			    "RDS_SESSION_STATE_FAILED", sp);
284			rw_exit(&sp->session_lock);
285			return (IBT_CM_REJECT);
286		}
287		sp->session_state = RDS_SESSION_STATE_INIT;
288		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
289		    "RDS_SESSION_STATE_INIT", sp);
290
291		/* FALLTHRU */
292	case RDS_SESSION_STATE_INIT:
293		/*
294		 * When re-using an existing session, make sure the
295		 * session is still through the same HCA. Otherwise, the
296		 * memory registrations have to moved to the new HCA.
297		 */
298		if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) {
299			if (sp->session_lgid.gid_guid != lgid.gid_guid) {
300				RDS_DPRINTF2("rds_handle_cm_req",
301				    "Existing Session but different gid "
302				    "existing: 0x%llx, new: 0x%llx, "
303				    "sending an MRA",
304				    sp->session_lgid.gid_guid, lgid.gid_guid);
305				(void) ibt_cm_delay(IBT_CM_DELAY_REQ,
306				    evp->cm_session_id, 10000000 /* 10 sec */,
307				    NULL, 0);
308				ret = rds_session_reinit(sp, lgid);
309				if (ret != 0) {
310					rds_session_fini(sp);
311					sp->session_state =
312					    RDS_SESSION_STATE_FAILED;
313					sp->session_failover = 0;
314					RDS_DPRINTF3("rds_failover_session",
315					    "SP(%p) State "
316					    "RDS_SESSION_STATE_FAILED", sp);
317					rw_exit(&sp->session_lock);
318					return (IBT_CM_REJECT);
319				}
320			}
321			ep = &sp->session_dataep;
322		} else {
323			ep = &sp->session_ctrlep;
324		}
325
326		break;
327	default:
328		RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected "
329		    "state: %d", sp, sp->session_state);
330		rw_exit(&sp->session_lock);
331		return (IBT_CM_REJECT);
332	}
333
334	sp->session_failover = 0; /* reset any previous value */
335	if (cmp.cmp_failover) {
336		RDS_DPRINTF2("rds_handle_cm_req",
337		    "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid);
338		sp->session_failover = 1;
339	}
340
341	mutex_enter(&ep->ep_lock);
342	if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
343		ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
344		sp->session_type = RDS_SESSION_PASSIVE;
345		rw_exit(&sp->session_lock);
346	} else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) {
347		rw_exit(&sp->session_lock);
348		/*
349		 * Peer to peer connection. There is an active
350		 * connection pending on this ep. The one with
351		 * greater port guid becomes active and the
352		 * other becomes passive.
353		 */
354		RDS_DPRINTF2("rds_handle_cm_req",
355		    "EP(%p) Peer-peer connection handling", ep);
356		if (lgid.gid_guid > rgid.gid_guid) {
357			/* this node is active so reject this request */
358			mutex_exit(&ep->ep_lock);
359			RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): "
360			    "Rejecting passive in favor of active", sp, ep);
361			return (IBT_CM_REJECT);
362		} else {
363			/*
364			 * This session is not the active end, change it
365			 * to passive end.
366			 */
367			ASSERT(sp->session_type == RDS_SESSION_ACTIVE);
368			ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
369
370			rw_enter(&sp->session_lock, RW_WRITER);
371			sp->session_type = RDS_SESSION_PASSIVE;
372			sp->session_lgid = lgid;
373			sp->session_rgid = rgid;
374			rw_exit(&sp->session_lock);
375		}
376	} else {
377		rw_exit(&sp->session_lock);
378	}
379
380	ep->ep_lbufid = cmp.cmp_last_bufid;
381	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
382	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
383	cmp.cmp_last_bufid = ep->ep_rbufid;
384	cmp.cmp_ack_addr = ep->ep_ack_addr;
385	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
386	mutex_exit(&ep->ep_lock);
387
388	/* continue with accepting the connection request for this channel */
389	chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port);
390	if (chanhdl == NULL) {
391		mutex_enter(&ep->ep_lock);
392		ep->ep_state = RDS_EP_STATE_UNCONNECTED;
393		mutex_exit(&ep->ep_lock);
394		return (IBT_CM_REJECT);
395	}
396
397	/* pre-post recv buffers in the RQ */
398	rds_post_recv_buf((void *)chanhdl);
399
400	rargsp->cm_ret_len = sizeof (rds_cm_private_data_t);
401	bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t));
402	rargsp->cm_ret.rep.cm_channel = chanhdl;
403	rargsp->cm_ret.rep.cm_rdma_ra_out = 4;
404	rargsp->cm_ret.rep.cm_rdma_ra_in = 4;
405	rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry;
406
407	RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)",
408	    sp, ep, chanhdl);
409
410	return (IBT_CM_ACCEPT);
411}
412
413/*
414 * Handle an incoming CM REP
415 * Pre-post recv buffers for the QP
416 */
417/* ARGSUSED */
418static ibt_cm_status_t
419rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp,
420    void *rcmp, ibt_priv_data_len_t rcmp_len)
421{
422	rds_ep_t	*ep;
423	rds_cm_private_data_t	cmp;
424
425	RDS_DPRINTF2("rds_handle_cm_rep", "Enter");
426
427	/* pre-post recv buffers in the RQ */
428	rds_post_recv_buf((void *)evp->cm_channel);
429
430	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
431	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
432	    sizeof (rds_cm_private_data_t));
433	ep->ep_lbufid = cmp.cmp_last_bufid;
434	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
435	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
436
437	rargsp->cm_ret_len = 0;
438
439	RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid);
440
441	return (IBT_CM_ACCEPT);
442}
443
444/*
445 * Handle CONN EST
446 */
447static ibt_cm_status_t
448rds_handle_cm_conn_est(ibt_cm_event_t *evp)
449{
450	rds_session_t	*sp;
451	rds_ep_t	*ep;
452
453	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
454
455	RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep,
456	    ep->ep_state);
457
458	mutex_enter(&ep->ep_lock);
459	ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) ||
460	    (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING));
461	ep->ep_state = RDS_EP_STATE_CONNECTED;
462	ep->ep_chanhdl = evp->cm_channel;
463	sp = ep->ep_sp;
464	mutex_exit(&ep->ep_lock);
465
466	(void) rds_session_active(sp);
467
468	RDS_DPRINTF2("rds_handle_cm_conn_est", "Return");
469	return (IBT_CM_ACCEPT);
470}
471
472/*
473 * Handle CONN CLOSED
474 */
475static ibt_cm_status_t
476rds_handle_cm_conn_closed(ibt_cm_event_t *evp)
477{
478	rds_ep_t	*ep;
479	rds_session_t	*sp;
480
481	/* Catch DREQs but ignore DREPs */
482	if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) {
483		RDS_DPRINTF2("rds_handle_cm_conn_closed",
484		    "Ignoring Event: %d received", evp->cm_event.closed);
485		return (IBT_CM_ACCEPT);
486	}
487
488	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
489	sp = ep->ep_sp;
490	RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Enter", ep);
491
492	mutex_enter(&ep->ep_lock);
493	if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
494		/* Ignore this DREQ */
495		RDS_DPRINTF2("rds_handle_cm_conn_closed",
496		    "EP(%p) not connected, state: %d", ep, ep->ep_state);
497		mutex_exit(&ep->ep_lock);
498		return (IBT_CM_ACCEPT);
499	}
500	ep->ep_state = RDS_EP_STATE_CLOSING;
501	mutex_exit(&ep->ep_lock);
502
503	rw_enter(&sp->session_lock, RW_WRITER);
504	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp,
505	    sp->session_state);
506
507	switch (sp->session_state) {
508	case RDS_SESSION_STATE_CONNECTED:
509		sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING;
510		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
511		    "RDS_SESSION_STATE_PASSIVE_CLOSING", sp);
512		break;
513
514	case RDS_SESSION_STATE_PASSIVE_CLOSING:
515		sp->session_state = RDS_SESSION_STATE_CLOSED;
516		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
517		    "RDS_SESSION_STATE_CLOSED", sp);
518		rds_passive_session_fini(sp);
519		sp->session_state = RDS_SESSION_STATE_FINI;
520		RDS_DPRINTF3("rds_handle_cm_conn_closed",
521		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
522		break;
523
524	case RDS_SESSION_STATE_ACTIVE_CLOSING:
525	case RDS_SESSION_STATE_ERROR:
526	case RDS_SESSION_STATE_CLOSED:
527		break;
528
529	case RDS_SESSION_STATE_INIT:
530		sp->session_state = RDS_SESSION_STATE_ERROR;
531		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
532		    "RDS_SESSION_STATE_ERROR", sp);
533		rds_passive_session_fini(sp);
534		sp->session_state = RDS_SESSION_STATE_FAILED;
535		RDS_DPRINTF3("rds_handle_cm_conn_closed",
536		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
537		break;
538
539	default:
540		RDS_DPRINTF2("rds_handle_cm_conn_closed",
541		    "SP(%p) - Unexpected state: %d", sp, sp->session_state);
542		rds_passive_session_fini(sp);
543		sp->session_state = RDS_SESSION_STATE_FAILED;
544		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
545		    "RDS_SESSION_STATE_FAILED", sp);
546	}
547	rw_exit(&sp->session_lock);
548
549	mutex_enter(&ep->ep_lock);
550	ep->ep_state = RDS_EP_STATE_CLOSED;
551	mutex_exit(&ep->ep_lock);
552
553	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp);
554	return (IBT_CM_ACCEPT);
555}
556
557/*
558 * Handle EVENT FAILURE
559 */
560static ibt_cm_status_t
561rds_handle_cm_event_failure(ibt_cm_event_t *evp)
562{
563	rds_ep_t	*ep;
564	rds_session_t	*sp;
565	int		ret;
566
567	RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p "
568	    "Code: %d msg: %d reason: %d", evp->cm_channel,
569	    evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg,
570	    evp->cm_event.failed.cf_reason);
571
572	if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) {
573		RDS_DPRINTF0(LABEL,
574		    "Received REJ with reason IBT_CM_INVALID_SID: "
575		    "The remote system could be running an older RDS version");
576	}
577
578	if (evp->cm_channel == NULL) {
579		return (IBT_CM_ACCEPT);
580	}
581
582	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
583	sp = ep->ep_sp;
584
585	mutex_enter(&ep->ep_lock);
586	ep->ep_state = RDS_EP_STATE_ERROR;
587	mutex_exit(&ep->ep_lock);
588
589	rw_enter(&sp->session_lock, RW_WRITER);
590	if (sp->session_type == RDS_SESSION_PASSIVE) {
591		RDS_DPRINTF2("rds_handle_cm_event_failure",
592		    "SP(%p) - state: %d", sp, sp->session_state);
593		if ((sp->session_state == RDS_SESSION_STATE_INIT) ||
594		    (sp->session_state == RDS_SESSION_STATE_CONNECTED)) {
595			sp->session_state = RDS_SESSION_STATE_ERROR;
596			RDS_DPRINTF3("rds_handle_cm_event_failure",
597			    "SP(%p) State RDS_SESSION_STATE_ERROR", sp);
598
599			/*
600			 * Store the cm_channel for freeing later
601			 * Active side frees it on ibt_open_rc_channel
602			 * failure
603			 */
604			if (ep->ep_chanhdl == NULL) {
605				ep->ep_chanhdl = evp->cm_channel;
606			}
607			rw_exit(&sp->session_lock);
608
609			/*
610			 * rds_passive_session_fini should not be called
611			 * directly in the CM handler. It will cause a deadlock.
612			 */
613			ret = ddi_taskq_dispatch(rds_taskq,
614			    rds_cleanup_passive_session, (void *)sp,
615			    DDI_NOSLEEP);
616			if (ret != DDI_SUCCESS) {
617				RDS_DPRINTF1("rds_handle_cm_event_failure",
618				    "SP(%p) TaskQ dispatch FAILED:%d", sp, ret);
619			}
620			return (IBT_CM_ACCEPT);
621		}
622	}
623	rw_exit(&sp->session_lock);
624
625	RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp);
626	return (IBT_CM_ACCEPT);
627}
628
629/*
630 * CM Handler
631 *
632 * Called by IBCM
633 * The cm_private type differs for active and passive events.
634 */
635ibt_cm_status_t
636rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp,
637    ibt_cm_return_args_t *ret_args, void *ret_priv_data,
638    ibt_priv_data_len_t ret_len_max)
639{
640	ibt_cm_status_t		ret = IBT_CM_ACCEPT;
641
642	RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type);
643
644	switch (eventp->cm_type) {
645	case IBT_CM_EVENT_REQ_RCV:
646		ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp,
647		    ret_args, ret_priv_data, ret_len_max);
648		break;
649	case IBT_CM_EVENT_REP_RCV:
650		ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data,
651		    ret_len_max);
652		break;
653	case IBT_CM_EVENT_MRA_RCV:
654		/* Not supported */
655		break;
656	case IBT_CM_EVENT_CONN_EST:
657		ret = rds_handle_cm_conn_est(eventp);
658		break;
659	case IBT_CM_EVENT_CONN_CLOSED:
660		ret = rds_handle_cm_conn_closed(eventp);
661		break;
662	case IBT_CM_EVENT_FAILURE:
663		ret = rds_handle_cm_event_failure(eventp);
664		break;
665	case IBT_CM_EVENT_LAP_RCV:
666		/* Not supported */
667		RDS_DPRINTF2(LABEL, "LAP message received");
668		break;
669	case IBT_CM_EVENT_APR_RCV:
670		/* Not supported */
671		RDS_DPRINTF2(LABEL, "APR message received");
672		break;
673	default:
674		break;
675	}
676
677	RDS_DPRINTF2("rds_cm_handler", "Return");
678
679	return (ret);
680}
681
682/* This is based on OFED Linux RDS */
683#define	RDS_PORT_NUM	6556
684
685/*
686 * Register the wellknown service with service id: RDS_SERVICE_ID
687 * Incoming connection requests should arrive on this service id.
688 */
689ibt_srv_hdl_t
690rds_register_service(ibt_clnt_hdl_t rds_ibhdl)
691{
692	ibt_srv_hdl_t	srvhdl;
693	ibt_srv_desc_t	srvdesc;
694	int		ret;
695
696	RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl);
697
698	bzero(&srvdesc, sizeof (ibt_srv_desc_t));
699	srvdesc.sd_handler = rds_cm_handler;
700	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
701
702	/*
703	 * Register the old service id for backward compatibility
704	 * REQs received on this service id would be rejected
705	 */
706	ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID,
707	    1, &rdsib_statep->rds_old_srvhdl, NULL);
708	if (ret != IBT_SUCCESS) {
709		RDS_DPRINTF2(LABEL,
710		    "RDS Service (0x%llx) Registration Failed: %d",
711		    RDS_SERVICE_ID, ret);
712		return (NULL);
713	}
714
715	/*
716	 * This is the new service id as per:
717	 * Annex A11: RDMA IP CM Service
718	 */
719	rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP,
720	    RDS_PORT_NUM);
721	ret = ibt_register_service(rds_ibhdl, &srvdesc,
722	    rdsib_statep->rds_service_id, 1, &srvhdl, NULL);
723	if (ret != IBT_SUCCESS) {
724		RDS_DPRINTF2(LABEL,
725		    "RDS Service (0x%llx) Registration Failed: %d",
726		    rdsib_statep->rds_service_id, ret);
727		return (NULL);
728	}
729
730	RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl);
731	return (srvhdl);
732}
733
734/* Bind the RDS service on all ports */
735int
736rds_bind_service(rds_state_t *statep)
737{
738	rds_hca_t	*hcap;
739	ib_gid_t	gid;
740	uint_t		jx, nbinds = 0, nports = 0;
741	int		ret;
742
743	RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep);
744
745	hcap = statep->rds_hcalistp;
746	while (hcap != NULL) {
747		for (jx = 0; jx < hcap->hca_nports; jx++) {
748			nports++;
749			if (hcap->hca_pinfop[jx].p_linkstate !=
750			    IBT_PORT_ACTIVE) {
751				/*
752				 * service bind will be called in the async
753				 * handler when the port comes up
754				 */
755				continue;
756			}
757
758			gid = hcap->hca_pinfop[jx].p_sgid_tbl[0];
759			RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d "
760			    "gid: %llx:%llx", hcap->hca_guid,
761			    hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix,
762			    gid.gid_guid);
763
764			/* pass statep as cm_private */
765			ret = ibt_bind_service(statep->rds_srvhdl, gid,
766			    NULL, statep, NULL);
767			if (ret != IBT_SUCCESS) {
768				RDS_DPRINTF2(LABEL, "Bind service for "
769				    "HCA: 0x%llx Port: %d gid %llx:%llx "
770				    "failed: %d", hcap->hca_guid,
771				    hcap->hca_pinfop[jx].p_port_num,
772				    gid.gid_prefix, gid.gid_guid, ret);
773				continue;
774			}
775
776			nbinds++;
777
778			/* bind the old service, ignore if it fails */
779			ret = ibt_bind_service(statep->rds_old_srvhdl, gid,
780			    NULL, statep, NULL);
781			if (ret != IBT_SUCCESS) {
782				RDS_DPRINTF2(LABEL, "Bind service for "
783				    "HCA: 0x%llx Port: %d gid %llx:%llx "
784				    "failed: %d", hcap->hca_guid,
785				    hcap->hca_pinfop[jx].p_port_num,
786				    gid.gid_prefix, gid.gid_guid, ret);
787			}
788		}
789		hcap = hcap->hca_nextp;
790	}
791
792	RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports",
793	    nbinds, nports);
794
795#if 0
796	if (nbinds == 0) {
797		return (-1);
798	}
799#endif
800
801	RDS_DPRINTF2("rds_bind_service", "Return");
802
803	return (0);
804}
805
806/* Open an RC connection */
807int
808rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
809    ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl)
810{
811	rds_session_t		*sp;
812	ibt_chan_open_args_t	ocargs;
813	ibt_rc_returns_t	ocrets;
814	rds_cm_private_data_t	cmp;
815	uint8_t			hca_port;
816	ibt_channel_hdl_t	hdl;
817	ibt_status_t		ret = 0;
818	ibt_ip_cm_info_t	ipcm_info;
819
820	RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode);
821
822	sp = ep->ep_sp;
823
824	bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
825	ipcm_info.src_addr.family = AF_INET;
826	ipcm_info.src_addr.un.ip4addr = sp->session_myip;
827	ipcm_info.dst_addr.family = AF_INET;
828	ipcm_info.dst_addr.un.ip4addr = sp->session_remip;
829	ipcm_info.src_port = 6556; /* based on OFED RDS */
830	ret = ibt_format_ip_private_data(&ipcm_info,
831	    sizeof (rds_cm_private_data_t), &cmp);
832	if (ret != IBT_SUCCESS) {
833		RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data "
834		    "failed: %d", sp, ep, ret);
835		return (-1);
836	}
837
838	hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num;
839
840	hdl = rds_ep_alloc_rc_channel(ep, hca_port);
841	if (hdl == NULL) {
842		return (-1);
843	}
844
845	cmp.cmp_version = RDS_VERSION;
846	cmp.cmp_arch = RDS_THIS_ARCH;
847	cmp.cmp_remip = sp->session_remip;
848	cmp.cmp_localip = sp->session_myip;
849	cmp.cmp_eptype = ep->ep_type;
850	cmp.cmp_failover = sp->session_failover;
851	cmp.cmp_last_bufid = ep->ep_rbufid;
852	cmp.cmp_user_buffer_size = UserBufferSize;
853	cmp.cmp_ack_addr = ep->ep_ack_addr;
854	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
855
856	bzero(&ocargs, sizeof (ibt_chan_open_args_t));
857	bzero(&ocrets, sizeof (ibt_rc_returns_t));
858	ocargs.oc_path = pinfo;
859	ocargs.oc_cm_handler = rds_cm_handler;
860	ocargs.oc_cm_clnt_private = NULL;
861	ocargs.oc_rdma_ra_out = 4;
862	ocargs.oc_rdma_ra_in = 4;
863	ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t);
864	ocargs.oc_priv_data = &cmp;
865	ocargs.oc_path_retry_cnt = IBPathRetryCount;
866	ocargs.oc_path_rnr_retry_cnt = MinRnrRetry;
867	ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS,
868	    mode, &ocargs, &ocrets);
869	if (ret != IBT_SUCCESS) {
870		RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel "
871		    "failed: %d", sp, ep, ret);
872		(void) ibt_flush_channel(hdl);
873		(void) ibt_free_channel(hdl);
874		/* cleanup stuff allocated in rds_ep_alloc_rc_channel */
875		(void) ibt_free_cq(ep->ep_recvcq);
876		ep->ep_recvcq = NULL;
877		(void) ibt_free_cq(ep->ep_sendcq);
878		ep->ep_sendcq = NULL;
879		return (-1);
880	}
881
882	*chanhdl = hdl;
883
884	RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep,
885	    *chanhdl);
886
887	return (0);
888}
889
890int
891rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode)
892{
893	int	ret;
894
895	RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)",
896	    chanhdl, mode);
897
898	ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0);
899
900	RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl);
901
902	return (ret);
903}
904