sdp_cma.c revision 302408
1132727Skan/*
290285Sobrien * Copyright (c) 2006 Mellanox Technologies Ltd.  All rights reserved.
3169699Skan *
418334Speter * This software is available to you under a choice of one of two
590285Sobrien * licenses.  You may choose to be licensed under the terms of the GNU
618334Speter * General Public License (GPL) Version 2, available from the file
790285Sobrien * COPYING in the main directory of this source tree, or the
890285Sobrien * OpenIB.org BSD license below:
990285Sobrien *
1090285Sobrien *     Redistribution and use in source and binary forms, with or
1118334Speter *     without modification, are permitted provided that the following
1290285Sobrien *     conditions are met:
1390285Sobrien *
1490285Sobrien *      - Redistributions of source code must retain the above
1590285Sobrien *        copyright notice, this list of conditions and the following
1618334Speter *        disclaimer.
1718334Speter *
1890285Sobrien *      - Redistributions in binary form must reproduce the above
19169699Skan *        copyright notice, this list of conditions and the following
20169699Skan *        disclaimer in the documentation and/or other materials
2118334Speter *        provided with the distribution.
2218334Speter *
2318334Speter * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2450617Sobrien * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25132727Skan * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26132727Skan * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
2718334Speter * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28117404Skan * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2990285Sobrien * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3090285Sobrien * SOFTWARE.
31169699Skan *
3218334Speter * $Id$
3318334Speter */
3418334Speter#include "sdp.h"
3518334Speter
3618334Speter#define SDP_MAJV_MINV 0x22
3718334Speter
3818334SpeterSDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of "
3918334Speter		"type Infiniband");
4018334Speter
4118334Speterenum {
4218334Speter	SDP_HH_SIZE = 76,
4318334Speter	SDP_HAH_SIZE = 180,
4418334Speter};
4518334Speter
4618334Speterstatic void
4718334Spetersdp_qp_event_handler(struct ib_event *event, void *data)
4818334Speter{
4918334Speter}
5018334Speter
51132727Skanstatic int
5218334Spetersdp_get_max_dev_sge(struct ib_device *dev)
53169699Skan{
5418334Speter	struct ib_device_attr attr;
55132727Skan	static int max_sges = -1;
5618334Speter
57132727Skan	if (max_sges > 0)
5818334Speter		goto out;
5918334Speter
60169699Skan	ib_query_device(dev, &attr);
61169699Skan
62169699Skan	max_sges = attr.max_sge;
63169699Skan
64169699Skanout:
65169699Skan	return max_sges;
66169699Skan}
67169699Skan
68169699Skanstatic int
69169699Skansdp_init_qp(struct socket *sk, struct rdma_cm_id *id)
7018334Speter{
7118334Speter	struct ib_qp_init_attr qp_init_attr = {
7218334Speter		.event_handler = sdp_qp_event_handler,
73132727Skan		.cap.max_send_wr = SDP_TX_SIZE,
7418334Speter		.cap.max_recv_wr = SDP_RX_SIZE,
75169699Skan        	.sq_sig_type = IB_SIGNAL_REQ_WR,
7618334Speter        	.qp_type = IB_QPT_RC,
7718334Speter	};
7818334Speter	struct ib_device *device = id->device;
7918334Speter	struct sdp_sock *ssk;
8018334Speter	int rc;
8118334Speter
8218334Speter	sdp_dbg(sk, "%s\n", __func__);
8318334Speter
8418334Speter	ssk = sdp_sk(sk);
8518334Speter	ssk->max_sge = sdp_get_max_dev_sge(device);
86169699Skan	sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge);
87169699Skan
8818334Speter	qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES);
89169699Skan	sdp_dbg(sk, "Setting max send sge to: %d\n",
9018334Speter	    qp_init_attr.cap.max_send_sge);
9118334Speter
9218334Speter	qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES);
93169699Skan	sdp_dbg(sk, "Setting max recv sge to: %d\n",
94169699Skan	    qp_init_attr.cap.max_recv_sge);
95169699Skan
96169699Skan	ssk->sdp_dev = ib_get_client_data(device, &sdp_client);
97169699Skan	if (!ssk->sdp_dev) {
98169699Skan		sdp_warn(sk, "SDP not available on device %s\n", device->name);
9918334Speter		rc = -ENODEV;
100169699Skan		goto err_rx;
10118334Speter	}
10218334Speter
10318334Speter	rc = sdp_rx_ring_create(ssk, device);
10418334Speter	if (rc)
10518334Speter		goto err_rx;
10618334Speter
10718334Speter	rc = sdp_tx_ring_create(ssk, device);
10818334Speter	if (rc)
10918334Speter		goto err_tx;
11018334Speter
11118334Speter	qp_init_attr.recv_cq = ssk->rx_ring.cq;
11218334Speter	qp_init_attr.send_cq = ssk->tx_ring.cq;
11318334Speter
11490285Sobrien	rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr);
11590285Sobrien	if (rc) {
11618334Speter		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
11718334Speter		goto err_qp;
11818334Speter	}
11918334Speter	ssk->qp = id->qp;
12018334Speter	ssk->ib_device = device;
12150617Sobrien	ssk->qp_active = 1;
12218334Speter	ssk->context.device = device;
12350617Sobrien
12418334Speter	sdp_dbg(sk, "%s done\n", __func__);
12518334Speter	return 0;
126132727Skan
127132727Skanerr_qp:
12818334Speter	sdp_tx_ring_destroy(ssk);
12950617Sobrienerr_tx:
13018334Speter	sdp_rx_ring_destroy(ssk);
13118334Spetererr_rx:
13218334Speter	return rc;
13318334Speter}
13418334Speter
13518334Speterstatic int
13618334Spetersdp_connect_handler(struct socket *sk, struct rdma_cm_id *id,
13718334Speter    struct rdma_cm_event *event)
13818334Speter{
13918334Speter	struct sockaddr_in *src_addr;
14018334Speter	struct sockaddr_in *dst_addr;
141169699Skan	struct socket *child;
14218334Speter	const struct sdp_hh *h;
14318334Speter	struct sdp_sock *ssk;
14418334Speter	int rc;
14518334Speter
146117404Skan	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
147117404Skan
14818334Speter	h = event->param.conn.private_data;
14918334Speter	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
15018334Speter
15118334Speter	if (!h->max_adverts)
15218334Speter		return -EINVAL;
15318334Speter
15418334Speter	child = sonewconn(sk, SS_ISCONNECTED);
15518334Speter	if (!child)
156132727Skan		return -ENOMEM;
15718334Speter
15818334Speter	ssk = sdp_sk(child);
15918334Speter	rc = sdp_init_qp(child, id);
16018334Speter	if (rc)
16118334Speter		return rc;
16218334Speter	SDP_WLOCK(ssk);
16318334Speter	id->context = ssk;
16418334Speter	ssk->id = id;
16518334Speter	ssk->socket = child;
16618334Speter	ssk->cred = crhold(child->so_cred);
16718334Speter	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
16818334Speter	src_addr = (struct sockaddr_in *)&id->route.addr.src_addr;
16918334Speter	ssk->fport = dst_addr->sin_port;
17018334Speter	ssk->faddr = dst_addr->sin_addr.s_addr;
171132727Skan	ssk->lport = src_addr->sin_port;
17218334Speter	ssk->max_bufs = ntohs(h->bsdh.bufs);
17318334Speter	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
17418334Speter	ssk->min_bufs = tx_credits(ssk) / 4;
17518334Speter	ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh);
176169699Skan	sdp_init_buffers(ssk, rcvbuf_initial_size);
17718334Speter	ssk->state = TCPS_SYN_RECEIVED;
17818334Speter	SDP_WUNLOCK(ssk);
179169699Skan
180169699Skan	return 0;
18118334Speter}
18218334Speter
18318334Speterstatic int
184169699Skansdp_response_handler(struct socket *sk, struct rdma_cm_id *id,
185169699Skan    struct rdma_cm_event *event)
186169699Skan{
18718334Speter	const struct sdp_hah *h;
18818334Speter	struct sockaddr_in *dst_addr;
18918334Speter	struct sdp_sock *ssk;
19018334Speter	sdp_dbg(sk, "%s\n", __func__);
19118334Speter
19218334Speter	ssk = sdp_sk(sk);
19318334Speter	SDP_WLOCK(ssk);
19418334Speter	ssk->state = TCPS_ESTABLISHED;
19518334Speter	sdp_set_default_moderation(ssk);
19618334Speter	if (ssk->flags & SDP_DROPPED) {
19718334Speter		SDP_WUNLOCK(ssk);
198169699Skan		return 0;
19918334Speter	}
20018334Speter	if (sk->so_options & SO_KEEPALIVE)
20118334Speter		sdp_start_keepalive_timer(sk);
20218334Speter	h = event->param.conn.private_data;
20318334Speter	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
204132727Skan	ssk->max_bufs = ntohs(h->bsdh.bufs);
20518334Speter	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
20618334Speter	ssk->min_bufs = tx_credits(ssk) / 4;
20718334Speter	ssk->xmit_size_goal =
20818334Speter		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
20918334Speter	ssk->poll_cq = 1;
21018334Speter
21150617Sobrien	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
21218334Speter	ssk->fport = dst_addr->sin_port;
21318334Speter	ssk->faddr = dst_addr->sin_addr.s_addr;
21418334Speter	soisconnected(sk);
21518334Speter	SDP_WUNLOCK(ssk);
21618334Speter
21718334Speter	return 0;
21818334Speter}
21918334Speter
22018334Speterstatic int
22118334Spetersdp_connected_handler(struct socket *sk, struct rdma_cm_event *event)
222169699Skan{
22318334Speter	struct sdp_sock *ssk;
22418334Speter
22518334Speter	sdp_dbg(sk, "%s\n", __func__);
22618334Speter
22718334Speter	ssk = sdp_sk(sk);
22818334Speter	SDP_WLOCK(ssk);
22918334Speter	ssk->state = TCPS_ESTABLISHED;
23018334Speter
231169699Skan	sdp_set_default_moderation(ssk);
232169699Skan
23318334Speter	if (sk->so_options & SO_KEEPALIVE)
23418334Speter		sdp_start_keepalive_timer(sk);
235169699Skan
23618334Speter	if ((ssk->flags & SDP_DROPPED) == 0)
23718334Speter		soisconnected(sk);
23818334Speter	SDP_WUNLOCK(ssk);
239169699Skan	return 0;
240169699Skan}
241169699Skan
242169699Skanstatic int
243169699Skansdp_disconnected_handler(struct socket *sk)
244169699Skan{
24518334Speter	struct sdp_sock *ssk;
246169699Skan
24718334Speter	ssk = sdp_sk(sk);
24818334Speter	sdp_dbg(sk, "%s\n", __func__);
24918334Speter
25018334Speter	SDP_WLOCK_ASSERT(ssk);
25118334Speter	if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) {
25218334Speter		sdp_connected_handler(sk, NULL);
25318334Speter
25418334Speter		if (rcv_nxt(ssk))
25518334Speter			return 0;
25618334Speter	}
25718334Speter
25818334Speter	return -ECONNRESET;
25918334Speter}
26018334Speter
26118334Speterint
26218334Spetersdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
26318334Speter{
26418334Speter	struct rdma_conn_param conn_param;
26518334Speter	struct socket *sk;
26618334Speter	struct sdp_sock *ssk;
26718334Speter	struct sdp_hah hah;
26818334Speter	struct sdp_hh hh;
26918334Speter
27018334Speter	int rc = 0;
27118334Speter
27218334Speter	ssk = id->context;
273169699Skan	sk = NULL;
27418334Speter	if (ssk)
275169699Skan		sk = ssk->socket;
276169699Skan	if (!ssk || !sk || !ssk->id) {
27718334Speter		sdp_dbg(sk,
278169699Skan		    "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n",
27918334Speter		       	event->event, ssk, sk, id);
280169699Skan		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
281169699Skan			-EINVAL : 0;
282169699Skan	}
283169699Skan
284169699Skan	sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id);
28518334Speter	switch (event->event) {
28618334Speter	case RDMA_CM_EVENT_ADDR_RESOLVED:
28718334Speter		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n");
28818334Speter
28918334Speter		if (sdp_link_layer_ib_only &&
29018334Speter			rdma_node_get_transport(id->device->node_type) ==
29118334Speter				RDMA_TRANSPORT_IB &&
29218334Speter			rdma_port_get_link_layer(id->device, id->port_num) !=
29390285Sobrien				IB_LINK_LAYER_INFINIBAND) {
294132727Skan			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
29518334Speter				"is allowed\n",
29618334Speter				rdma_port_get_link_layer(id->device, id->port_num));
29790285Sobrien			rc = -ENETUNREACH;
29890285Sobrien			break;
29990285Sobrien		}
30090285Sobrien
30118334Speter		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
30218334Speter		break;
30390285Sobrien	case RDMA_CM_EVENT_ADDR_ERROR:
30490285Sobrien		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n");
305260014Spfg		rc = -ENETUNREACH;
306260014Spfg		break;
307260014Spfg	case RDMA_CM_EVENT_ROUTE_RESOLVED:
308260014Spfg		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id);
309169699Skan		rc = sdp_init_qp(sk, id);
310169699Skan		if (rc)
31118334Speter			break;
31218334Speter		atomic_set(&sdp_sk(sk)->remote_credits,
31318334Speter				rx_ring_posted(sdp_sk(sk)));
31418334Speter		memset(&hh, 0, sizeof hh);
31518334Speter		hh.bsdh.mid = SDP_MID_HELLO;
31618334Speter		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
31718334Speter		hh.max_adverts = 1;
31818334Speter		hh.ipv_cap = 0x40;
31918334Speter		hh.majv_minv = SDP_MAJV_MINV;
32018334Speter		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
32118334Speter		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
32218334Speter		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes);
32318334Speter		hh.max_adverts = 0x1;
32418334Speter		sdp_sk(sk)->laddr =
32518334Speter			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
32618334Speter		memset(&conn_param, 0, sizeof conn_param);
32718334Speter		conn_param.private_data_len = sizeof hh;
32818334Speter		conn_param.private_data = &hh;
32918334Speter		conn_param.responder_resources = 4 /* TODO */;
330169699Skan		conn_param.initiator_depth = 4 /* TODO */;
331169699Skan		conn_param.retry_count = SDP_RETRY_COUNT;
332169699Skan		SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh);
333169699Skan		rc = rdma_connect(id, &conn_param);
334169699Skan		break;
335169699Skan	case RDMA_CM_EVENT_ROUTE_ERROR:
336169699Skan		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id);
337169699Skan		rc = -ETIMEDOUT;
338169699Skan		break;
339169699Skan	case RDMA_CM_EVENT_CONNECT_REQUEST:
340169699Skan		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n");
341169699Skan		rc = sdp_connect_handler(sk, id, event);
342169699Skan		if (rc) {
343169699Skan			sdp_dbg(sk, "Destroying qp\n");
344169699Skan			rdma_reject(id, NULL, 0);
345169699Skan			break;
346169699Skan		}
347169699Skan		ssk = id->context;
348169699Skan		atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));
349169699Skan		memset(&hah, 0, sizeof hah);
350169699Skan		hah.bsdh.mid = SDP_MID_HELLO_ACK;
351169699Skan		hah.bsdh.bufs = htons(rx_ring_posted(ssk));
352169699Skan		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
35390285Sobrien		hah.majv_minv = SDP_MAJV_MINV;
354169699Skan		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
35518334Speter					    but just in case */
35618334Speter		hah.actrcvsz = htonl(ssk->recv_bytes);
35718334Speter		memset(&conn_param, 0, sizeof conn_param);
35818334Speter		conn_param.private_data_len = sizeof hah;
35918334Speter		conn_param.private_data = &hah;
360132727Skan		conn_param.responder_resources = 4 /* TODO */;
361132727Skan		conn_param.initiator_depth = 4 /* TODO */;
36218334Speter		conn_param.retry_count = SDP_RETRY_COUNT;
36318334Speter		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
36490285Sobrien		rc = rdma_accept(id, &conn_param);
36590285Sobrien		if (rc) {
36618334Speter			ssk->id = NULL;
36790285Sobrien			id->qp = NULL;
36890285Sobrien			id->context = NULL;
36918334Speter		}
37018334Speter		break;
37190285Sobrien	case RDMA_CM_EVENT_CONNECT_RESPONSE:
37290285Sobrien		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n");
37390285Sobrien		rc = sdp_response_handler(sk, id, event);
37490285Sobrien		if (rc) {
37590285Sobrien			sdp_dbg(sk, "Destroying qp\n");
37690285Sobrien			rdma_reject(id, NULL, 0);
37718334Speter		} else
37818334Speter			rc = rdma_accept(id, NULL);
379169699Skan		break;
380169699Skan	case RDMA_CM_EVENT_CONNECT_ERROR:
381169699Skan		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n");
382169699Skan		rc = -ETIMEDOUT;
383169699Skan		break;
384169699Skan	case RDMA_CM_EVENT_UNREACHABLE:
385169699Skan		sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n");
386169699Skan		rc = -ENETUNREACH;
387169699Skan		break;
388169699Skan	case RDMA_CM_EVENT_REJECTED:
389169699Skan		sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n");
390169699Skan		rc = -ECONNREFUSED;
391169699Skan		break;
392169699Skan	case RDMA_CM_EVENT_ESTABLISHED:
393169699Skan		sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n");
394169699Skan		sdp_sk(sk)->laddr =
395169699Skan			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
396169699Skan		rc = sdp_connected_handler(sk, event);
397169699Skan		break;
398169699Skan	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
399169699Skan		sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
400169699Skan
401169699Skan		SDP_WLOCK(ssk);
40218334Speter		if (ssk->state == TCPS_LAST_ACK) {
403169699Skan			sdp_cancel_dreq_wait_timeout(ssk);
404169699Skan
405169699Skan			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
406169699Skan				__func__);
407169699Skan		}
408259661Spfg		ssk->qp_active = 0;
409169699Skan		SDP_WUNLOCK(ssk);
410169699Skan		rdma_disconnect(id);
411169699Skan		SDP_WLOCK(ssk);
412169699Skan		if (ssk->state != TCPS_TIME_WAIT) {
413169699Skan			if (ssk->state == TCPS_CLOSE_WAIT) {
414169699Skan				sdp_dbg(sk, "IB teardown while in "
415169699Skan					"TCPS_CLOSE_WAIT taking reference to "
416169699Skan					"let close() finish the work\n");
417169699Skan			}
418169699Skan			rc = sdp_disconnected_handler(sk);
419169699Skan			if (rc)
420169699Skan				rc = -EPIPE;
421169699Skan		}
422169699Skan		SDP_WUNLOCK(ssk);
423169699Skan		break;
424169699Skan	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
425169699Skan		sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n");
426169699Skan		SDP_WLOCK(ssk);
427169699Skan		rc = sdp_disconnected_handler(sk);
428169699Skan		SDP_WUNLOCK(ssk);
42990285Sobrien		break;
430169699Skan	case RDMA_CM_EVENT_DEVICE_REMOVAL:
43118334Speter		sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
43218334Speter		rc = -ENETRESET;
43318334Speter		break;
434169699Skan	default:
435169699Skan		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
43618334Speter		       event->event);
437169699Skan		rc = -ECONNABORTED;
438169699Skan		break;
439169699Skan	}
440169699Skan
441169699Skan	sdp_dbg(sk, "event %d done. status %d\n", event->event, rc);
442169699Skan
443169699Skan	if (rc) {
444169699Skan		SDP_WLOCK(ssk);
445169699Skan		if (ssk->id == id) {
446258748Spfg			ssk->id = NULL;
447258748Spfg			id->qp = NULL;
448258748Spfg			id->context = NULL;
449258748Spfg			if (sdp_notify(ssk, -rc))
450258748Spfg				SDP_WUNLOCK(ssk);
451258748Spfg		} else
452258748Spfg			SDP_WUNLOCK(ssk);
453258748Spfg	}
454258748Spfg
455169699Skan	return rc;
456169699Skan}
457169699Skan