1/*
2 * Copyright (c) 2006 Mellanox Technologies Ltd.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34#include "sdp.h"
35
36#define SDP_MAJV_MINV 0x22
37
38SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of "
39		"type Infiniband");
40
41enum {
42	SDP_HH_SIZE = 76,
43	SDP_HAH_SIZE = 180,
44};
45
46static void
47sdp_qp_event_handler(struct ib_event *event, void *data)
48{
49}
50
51static int
52sdp_get_max_dev_sge(struct ib_device *dev)
53{
54	struct ib_device_attr *device_attr;
55	static int max_sges = -1;
56
57	if (max_sges > 0)
58		goto out;
59
60	device_attr = &dev->attrs;
61	max_sges = device_attr->max_sge;
62
63out:
64	return max_sges;
65}
66
67static int
68sdp_init_qp(struct socket *sk, struct rdma_cm_id *id)
69{
70	struct ib_qp_init_attr qp_init_attr = {
71		.event_handler = sdp_qp_event_handler,
72		.cap.max_send_wr = SDP_TX_SIZE,
73		.cap.max_recv_wr = SDP_RX_SIZE,
74        	.sq_sig_type = IB_SIGNAL_REQ_WR,
75        	.qp_type = IB_QPT_RC,
76	};
77	struct ib_device *device = id->device;
78	struct sdp_sock *ssk;
79	int rc;
80
81	sdp_dbg(sk, "%s\n", __func__);
82
83	ssk = sdp_sk(sk);
84	ssk->max_sge = sdp_get_max_dev_sge(device);
85	sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge);
86
87	qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES);
88	sdp_dbg(sk, "Setting max send sge to: %d\n",
89	    qp_init_attr.cap.max_send_sge);
90
91	qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES);
92	sdp_dbg(sk, "Setting max recv sge to: %d\n",
93	    qp_init_attr.cap.max_recv_sge);
94
95	ssk->sdp_dev = ib_get_client_data(device, &sdp_client);
96	if (!ssk->sdp_dev) {
97		sdp_warn(sk, "SDP not available on device %s\n", device->name);
98		rc = -ENODEV;
99		goto err_rx;
100	}
101
102	rc = sdp_rx_ring_create(ssk, device);
103	if (rc)
104		goto err_rx;
105
106	rc = sdp_tx_ring_create(ssk, device);
107	if (rc)
108		goto err_tx;
109
110	qp_init_attr.recv_cq = ssk->rx_ring.cq;
111	qp_init_attr.send_cq = ssk->tx_ring.cq;
112
113	rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr);
114	if (rc) {
115		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
116		goto err_qp;
117	}
118	ssk->qp = id->qp;
119	ssk->ib_device = device;
120	ssk->qp_active = 1;
121	ssk->context.device = device;
122
123	sdp_dbg(sk, "%s done\n", __func__);
124	return 0;
125
126err_qp:
127	sdp_tx_ring_destroy(ssk);
128err_tx:
129	sdp_rx_ring_destroy(ssk);
130err_rx:
131	return rc;
132}
133
134static int
135sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id,
136    struct rdma_cm_event *event)
137{
138	struct sockaddr_in *src_addr;
139	struct sockaddr_in *dst_addr;
140	struct socket *child;
141	const struct sdp_hh *h;
142	struct sdp_sock *ssk;
143	int rc;
144
145	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
146
147	h = event->param.conn.private_data;
148	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
149
150	if (!h->max_adverts)
151		return -EINVAL;
152
153	child = sonewconn(sk, SS_ISCONNECTED);
154	if (!child)
155		return -ENOMEM;
156
157	ssk = sdp_sk(child);
158	rc = sdp_init_qp(child, id);
159	if (rc)
160		return rc;
161	SDP_WLOCK(ssk);
162	id->context = ssk;
163	ssk->id = id;
164	ssk->socket = child;
165	ssk->cred = crhold(child->so_cred);
166	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
167	src_addr = (struct sockaddr_in *)&id->route.addr.src_addr;
168	ssk->fport = dst_addr->sin_port;
169	ssk->faddr = dst_addr->sin_addr.s_addr;
170	ssk->lport = src_addr->sin_port;
171	ssk->max_bufs = ntohs(h->bsdh.bufs);
172	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
173	ssk->min_bufs = tx_credits(ssk) / 4;
174	ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh);
175	sdp_init_buffers(ssk, rcvbuf_initial_size);
176	ssk->state = TCPS_SYN_RECEIVED;
177	SDP_WUNLOCK(ssk);
178
179	return 0;
180}
181
182static int
183sdp_response_handler(struct socket *sk, struct rdma_cm_id *id,
184    struct rdma_cm_event *event)
185{
186	const struct sdp_hah *h;
187	struct sockaddr_in *dst_addr;
188	struct sdp_sock *ssk;
189	sdp_dbg(sk, "%s\n", __func__);
190
191	ssk = sdp_sk(sk);
192	SDP_WLOCK(ssk);
193	ssk->state = TCPS_ESTABLISHED;
194	sdp_set_default_moderation(ssk);
195	if (ssk->flags & SDP_DROPPED) {
196		SDP_WUNLOCK(ssk);
197		return 0;
198	}
199	if (sk->so_options & SO_KEEPALIVE)
200		sdp_start_keepalive_timer(sk);
201	h = event->param.conn.private_data;
202	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
203	ssk->max_bufs = ntohs(h->bsdh.bufs);
204	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
205	ssk->min_bufs = tx_credits(ssk) / 4;
206	ssk->xmit_size_goal =
207		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
208	ssk->poll_cq = 1;
209
210	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
211	ssk->fport = dst_addr->sin_port;
212	ssk->faddr = dst_addr->sin_addr.s_addr;
213	soisconnected(sk);
214	SDP_WUNLOCK(ssk);
215
216	return 0;
217}
218
219static int
220sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event)
221{
222	struct sdp_sock *ssk;
223
224	sdp_dbg(sk, "%s\n", __func__);
225
226	ssk = sdp_sk(sk);
227	SDP_WLOCK(ssk);
228	ssk->state = TCPS_ESTABLISHED;
229
230	sdp_set_default_moderation(ssk);
231
232	if (sk->so_options & SO_KEEPALIVE)
233		sdp_start_keepalive_timer(sk);
234
235	if ((ssk->flags & SDP_DROPPED) == 0)
236		soisconnected(sk);
237	SDP_WUNLOCK(ssk);
238	return 0;
239}
240
241static int
242sdp_disconnected_handler(struct socket *sk)
243{
244	struct sdp_sock *ssk;
245
246	ssk = sdp_sk(sk);
247	sdp_dbg(sk, "%s\n", __func__);
248
249	SDP_WLOCK_ASSERT(ssk);
250	if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) {
251		sdp_connected_handler(sk, NULL);
252
253		if (rcv_nxt(ssk))
254			return 0;
255	}
256
257	return -ECONNRESET;
258}
259
260int
261sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
262{
263	struct rdma_conn_param conn_param;
264	struct socket *sk;
265	struct sdp_sock *ssk;
266	struct sdp_hah hah;
267	struct sdp_hh hh;
268
269	int rc = 0;
270
271	ssk = id->context;
272	sk = NULL;
273	if (ssk)
274		sk = ssk->socket;
275	if (!ssk || !sk || !ssk->id) {
276		sdp_dbg(sk,
277		    "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n",
278		       	event->event, ssk, sk, id);
279		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
280			-EINVAL : 0;
281	}
282
283	sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id);
284	switch (event->event) {
285	case RDMA_CM_EVENT_ADDR_RESOLVED:
286		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n");
287
288		if (sdp_link_layer_ib_only &&
289			rdma_node_get_transport(id->device->node_type) ==
290				RDMA_TRANSPORT_IB &&
291			rdma_port_get_link_layer(id->device, id->port_num) !=
292				IB_LINK_LAYER_INFINIBAND) {
293			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
294				"is allowed\n",
295				rdma_port_get_link_layer(id->device, id->port_num));
296			rc = -ENETUNREACH;
297			break;
298		}
299
300		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
301		break;
302	case RDMA_CM_EVENT_ADDR_ERROR:
303		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n");
304		rc = -ENETUNREACH;
305		break;
306	case RDMA_CM_EVENT_ROUTE_RESOLVED:
307		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id);
308		rc = sdp_init_qp(sk, id);
309		if (rc)
310			break;
311		atomic_set(&sdp_sk(sk)->remote_credits,
312				rx_ring_posted(sdp_sk(sk)));
313		memset(&hh, 0, sizeof hh);
314		hh.bsdh.mid = SDP_MID_HELLO;
315		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
316		hh.max_adverts = 1;
317		hh.ipv_cap = 0x40;
318		hh.majv_minv = SDP_MAJV_MINV;
319		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
320		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
321		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes);
322		hh.max_adverts = 0x1;
323		sdp_sk(sk)->laddr =
324			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
325		memset(&conn_param, 0, sizeof conn_param);
326		conn_param.private_data_len = sizeof hh;
327		conn_param.private_data = &hh;
328		conn_param.responder_resources = 4 /* TODO */;
329		conn_param.initiator_depth = 4 /* TODO */;
330		conn_param.retry_count = SDP_RETRY_COUNT;
331		SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh);
332		rc = rdma_connect(id, &conn_param);
333		break;
334	case RDMA_CM_EVENT_ROUTE_ERROR:
335		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id);
336		rc = -ETIMEDOUT;
337		break;
338	case RDMA_CM_EVENT_CONNECT_REQUEST:
339		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n");
340		rc = sdp_connect_handler(sk, id, event);
341		if (rc) {
342			sdp_dbg(sk, "Destroying qp\n");
343			rdma_reject(id, NULL, 0);
344			break;
345		}
346		ssk = id->context;
347		atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));
348		memset(&hah, 0, sizeof hah);
349		hah.bsdh.mid = SDP_MID_HELLO_ACK;
350		hah.bsdh.bufs = htons(rx_ring_posted(ssk));
351		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
352		hah.majv_minv = SDP_MAJV_MINV;
353		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
354					    but just in case */
355		hah.actrcvsz = htonl(ssk->recv_bytes);
356		memset(&conn_param, 0, sizeof conn_param);
357		conn_param.private_data_len = sizeof hah;
358		conn_param.private_data = &hah;
359		conn_param.responder_resources = 4 /* TODO */;
360		conn_param.initiator_depth = 4 /* TODO */;
361		conn_param.retry_count = SDP_RETRY_COUNT;
362		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
363		rc = rdma_accept(id, &conn_param);
364		if (rc) {
365			ssk->id = NULL;
366			id->qp = NULL;
367			id->context = NULL;
368		}
369		break;
370	case RDMA_CM_EVENT_CONNECT_RESPONSE:
371		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n");
372		rc = sdp_response_handler(sk, id, event);
373		if (rc) {
374			sdp_dbg(sk, "Destroying qp\n");
375			rdma_reject(id, NULL, 0);
376		} else
377			rc = rdma_accept(id, NULL);
378		break;
379	case RDMA_CM_EVENT_CONNECT_ERROR:
380		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n");
381		rc = -ETIMEDOUT;
382		break;
383	case RDMA_CM_EVENT_UNREACHABLE:
384		sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n");
385		rc = -ENETUNREACH;
386		break;
387	case RDMA_CM_EVENT_REJECTED:
388		sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n");
389		rc = -ECONNREFUSED;
390		break;
391	case RDMA_CM_EVENT_ESTABLISHED:
392		sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n");
393		sdp_sk(sk)->laddr =
394			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
395		rc = sdp_connected_handler(sk, event);
396		break;
397	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
398		sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
399
400		SDP_WLOCK(ssk);
401		if (ssk->state == TCPS_LAST_ACK) {
402			sdp_cancel_dreq_wait_timeout(ssk);
403
404			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
405				__func__);
406		}
407		ssk->qp_active = 0;
408		SDP_WUNLOCK(ssk);
409		rdma_disconnect(id);
410		SDP_WLOCK(ssk);
411		if (ssk->state != TCPS_TIME_WAIT) {
412			if (ssk->state == TCPS_CLOSE_WAIT) {
413				sdp_dbg(sk, "IB teardown while in "
414					"TCPS_CLOSE_WAIT taking reference to "
415					"let close() finish the work\n");
416			}
417			rc = sdp_disconnected_handler(sk);
418			if (rc)
419				rc = -EPIPE;
420		}
421		SDP_WUNLOCK(ssk);
422		break;
423	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
424		sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n");
425		SDP_WLOCK(ssk);
426		rc = sdp_disconnected_handler(sk);
427		SDP_WUNLOCK(ssk);
428		break;
429	case RDMA_CM_EVENT_DEVICE_REMOVAL:
430		sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
431		rc = -ENETRESET;
432		break;
433	default:
434		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
435		       event->event);
436		rc = -ECONNABORTED;
437		break;
438	}
439
440	sdp_dbg(sk, "event %d done. status %d\n", event->event, rc);
441
442	if (rc) {
443		SDP_WLOCK(ssk);
444		if (ssk->id == id) {
445			ssk->id = NULL;
446			id->qp = NULL;
447			id->context = NULL;
448			if (sdp_notify(ssk, -rc))
449				SDP_WUNLOCK(ssk);
450		} else
451			SDP_WUNLOCK(ssk);
452	}
453
454	return rc;
455}
456