1/*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2006 Mellanox Technologies Ltd.  All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 * $Id$
35 */
36#include "sdp.h"
37
38#define SDP_MAJV_MINV 0x22
39
40SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of "
41		"type Infiniband");
42
43enum {
44	SDP_HH_SIZE = 76,
45	SDP_HAH_SIZE = 180,
46};
47
48static void
49sdp_qp_event_handler(struct ib_event *event, void *data)
50{
51	struct socket *sk = data;
52
53	sdp_dbg(sk, "QP Event: %s (%d)", ib_event_msg(event->event),
54	    event->event);
55}
56
57static int
58sdp_get_max_dev_sge(struct ib_device *dev)
59{
60	struct ib_device_attr *device_attr;
61	static int max_sges = -1;
62
63	if (max_sges > 0)
64		goto out;
65
66	device_attr = &dev->attrs;
67	max_sges = device_attr->max_sge;
68
69out:
70	return max_sges;
71}
72
73static int
74sdp_init_qp(struct socket *sk, struct rdma_cm_id *id)
75{
76	struct ib_qp_init_attr qp_init_attr = {
77		.event_handler = sdp_qp_event_handler,
78		.qp_context = sk,
79		.cap.max_send_wr = SDP_TX_SIZE,
80		.cap.max_recv_wr = SDP_RX_SIZE,
81        	.sq_sig_type = IB_SIGNAL_REQ_WR,
82        	.qp_type = IB_QPT_RC,
83	};
84	struct ib_device *device = id->device;
85	struct sdp_sock *ssk;
86	int rc;
87
88	sdp_dbg(sk, "%s\n", __func__);
89
90	ssk = sdp_sk(sk);
91	ssk->max_sge = sdp_get_max_dev_sge(device);
92	sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge);
93
94	qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES);
95	sdp_dbg(sk, "Setting max send sge to: %d\n",
96	    qp_init_attr.cap.max_send_sge);
97
98	qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES);
99	sdp_dbg(sk, "Setting max recv sge to: %d\n",
100	    qp_init_attr.cap.max_recv_sge);
101
102	ssk->sdp_dev = ib_get_client_data(device, &sdp_client);
103	if (!ssk->sdp_dev) {
104		sdp_warn(sk, "SDP not available on device %s\n", device->name);
105		rc = -ENODEV;
106		goto err_rx;
107	}
108
109	rc = sdp_rx_ring_create(ssk, device);
110	if (rc)
111		goto err_rx;
112
113	rc = sdp_tx_ring_create(ssk, device);
114	if (rc)
115		goto err_tx;
116
117	qp_init_attr.recv_cq = ssk->rx_ring.cq;
118	qp_init_attr.send_cq = ssk->tx_ring.cq;
119
120	rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr);
121	if (rc) {
122		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
123		goto err_qp;
124	}
125	ssk->qp = id->qp;
126	ssk->ib_device = device;
127	ssk->qp_active = 1;
128	ssk->context.device = device;
129
130	sdp_dbg(sk, "%s done\n", __func__);
131	return 0;
132
133err_qp:
134	sdp_tx_ring_destroy(ssk);
135err_tx:
136	sdp_rx_ring_destroy(ssk);
137err_rx:
138	return rc;
139}
140
141static int
142sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id,
143    struct rdma_cm_event *event)
144{
145	struct sockaddr_in *src_addr;
146	struct sockaddr_in *dst_addr;
147	struct socket *child;
148	const struct sdp_hh *h;
149	struct sdp_sock *ssk;
150	int rc;
151
152	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
153
154	h = event->param.conn.private_data;
155	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
156
157	if (!h->max_adverts)
158		return -EINVAL;
159
160	child = sonewconn(sk, SS_ISCONNECTED);
161	if (!child)
162		return -ENOMEM;
163
164	ssk = sdp_sk(child);
165	rc = sdp_init_qp(child, id);
166	if (rc)
167		return rc;
168	SDP_WLOCK(ssk);
169	id->context = ssk;
170	ssk->id = id;
171	ssk->socket = child;
172	ssk->cred = crhold(child->so_cred);
173	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
174	src_addr = (struct sockaddr_in *)&id->route.addr.src_addr;
175	ssk->fport = dst_addr->sin_port;
176	ssk->faddr = dst_addr->sin_addr.s_addr;
177	ssk->lport = src_addr->sin_port;
178	ssk->max_bufs = ntohs(h->bsdh.bufs);
179	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
180	ssk->min_bufs = tx_credits(ssk) / 4;
181	ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh);
182	sdp_init_buffers(ssk, rcvbuf_initial_size);
183	ssk->state = TCPS_SYN_RECEIVED;
184	SDP_WUNLOCK(ssk);
185
186	return 0;
187}
188
189static int
190sdp_response_handler(struct socket *sk, struct rdma_cm_id *id,
191    struct rdma_cm_event *event)
192{
193	const struct sdp_hah *h;
194	struct sockaddr_in *dst_addr;
195	struct sdp_sock *ssk;
196	sdp_dbg(sk, "%s\n", __func__);
197
198	ssk = sdp_sk(sk);
199	SDP_WLOCK(ssk);
200	ssk->state = TCPS_ESTABLISHED;
201	sdp_set_default_moderation(ssk);
202	if (ssk->flags & SDP_DROPPED) {
203		SDP_WUNLOCK(ssk);
204		return 0;
205	}
206	if (sk->so_options & SO_KEEPALIVE)
207		sdp_start_keepalive_timer(sk);
208	h = event->param.conn.private_data;
209	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
210	ssk->max_bufs = ntohs(h->bsdh.bufs);
211	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
212	ssk->min_bufs = tx_credits(ssk) / 4;
213	ssk->xmit_size_goal =
214		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
215	ssk->poll_cq = 1;
216
217	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
218	ssk->fport = dst_addr->sin_port;
219	ssk->faddr = dst_addr->sin_addr.s_addr;
220	soisconnected(sk);
221	SDP_WUNLOCK(ssk);
222
223	return 0;
224}
225
226static int
227sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event)
228{
229	struct sdp_sock *ssk;
230
231	sdp_dbg(sk, "%s\n", __func__);
232
233	ssk = sdp_sk(sk);
234	SDP_WLOCK(ssk);
235	ssk->state = TCPS_ESTABLISHED;
236
237	sdp_set_default_moderation(ssk);
238
239	if (sk->so_options & SO_KEEPALIVE)
240		sdp_start_keepalive_timer(sk);
241
242	if ((ssk->flags & SDP_DROPPED) == 0)
243		soisconnected(sk);
244	SDP_WUNLOCK(ssk);
245	return 0;
246}
247
248static int
249sdp_disconnected_handler(struct socket *sk)
250{
251	struct sdp_sock *ssk;
252
253	ssk = sdp_sk(sk);
254	sdp_dbg(sk, "%s\n", __func__);
255
256	SDP_WLOCK_ASSERT(ssk);
257	if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) {
258		sdp_connected_handler(sk, NULL);
259
260		if (rcv_nxt(ssk))
261			return 0;
262	}
263
264	return -ECONNRESET;
265}
266
267int
268sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
269{
270	struct rdma_conn_param conn_param;
271	struct socket *sk;
272	struct sdp_sock *ssk;
273	struct sdp_hah hah;
274	struct sdp_hh hh;
275
276	int rc = 0;
277
278	ssk = id->context;
279	sk = NULL;
280	if (ssk)
281		sk = ssk->socket;
282	if (!ssk || !sk || !ssk->id) {
283		sdp_dbg(sk,
284		    "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n",
285		       	event->event, ssk, sk, id);
286		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
287			-EINVAL : 0;
288	}
289
290	sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id);
291	switch (event->event) {
292	case RDMA_CM_EVENT_ADDR_RESOLVED:
293		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n");
294
295		if (sdp_link_layer_ib_only &&
296			rdma_node_get_transport(id->device->node_type) ==
297				RDMA_TRANSPORT_IB &&
298			rdma_port_get_link_layer(id->device, id->port_num) !=
299				IB_LINK_LAYER_INFINIBAND) {
300			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
301				"is allowed\n",
302				rdma_port_get_link_layer(id->device, id->port_num));
303			rc = -ENETUNREACH;
304			break;
305		}
306
307		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
308		break;
309	case RDMA_CM_EVENT_ADDR_ERROR:
310		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n");
311		rc = -ENETUNREACH;
312		break;
313	case RDMA_CM_EVENT_ROUTE_RESOLVED:
314		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id);
315		rc = sdp_init_qp(sk, id);
316		if (rc)
317			break;
318		atomic_set(&sdp_sk(sk)->remote_credits,
319				rx_ring_posted(sdp_sk(sk)));
320		memset(&hh, 0, sizeof hh);
321		hh.bsdh.mid = SDP_MID_HELLO;
322		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
323		hh.max_adverts = 1;
324		hh.ipv_cap = 0x40;
325		hh.majv_minv = SDP_MAJV_MINV;
326		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
327		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
328		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes);
329		hh.max_adverts = 0x1;
330		sdp_sk(sk)->laddr =
331			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
332		memset(&conn_param, 0, sizeof conn_param);
333		conn_param.private_data_len = sizeof hh;
334		conn_param.private_data = &hh;
335		conn_param.responder_resources = 4 /* TODO */;
336		conn_param.initiator_depth = 4 /* TODO */;
337		conn_param.retry_count = SDP_RETRY_COUNT;
338		SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh);
339		rc = rdma_connect(id, &conn_param);
340		break;
341	case RDMA_CM_EVENT_ROUTE_ERROR:
342		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id);
343		rc = -ETIMEDOUT;
344		break;
345	case RDMA_CM_EVENT_CONNECT_REQUEST:
346		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n");
347		rc = sdp_connect_handler(sk, id, event);
348		if (rc) {
349			sdp_dbg(sk, "Destroying qp\n");
350			rdma_reject(id, NULL, 0);
351			break;
352		}
353		ssk = id->context;
354		atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));
355		memset(&hah, 0, sizeof hah);
356		hah.bsdh.mid = SDP_MID_HELLO_ACK;
357		hah.bsdh.bufs = htons(rx_ring_posted(ssk));
358		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
359		hah.majv_minv = SDP_MAJV_MINV;
360		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
361					    but just in case */
362		hah.actrcvsz = htonl(ssk->recv_bytes);
363		memset(&conn_param, 0, sizeof conn_param);
364		conn_param.private_data_len = sizeof hah;
365		conn_param.private_data = &hah;
366		conn_param.responder_resources = 4 /* TODO */;
367		conn_param.initiator_depth = 4 /* TODO */;
368		conn_param.retry_count = SDP_RETRY_COUNT;
369		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
370		rc = rdma_accept(id, &conn_param);
371		if (rc) {
372			ssk->id = NULL;
373			id->qp = NULL;
374			id->context = NULL;
375		}
376		break;
377	case RDMA_CM_EVENT_CONNECT_RESPONSE:
378		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n");
379		rc = sdp_response_handler(sk, id, event);
380		if (rc) {
381			sdp_dbg(sk, "Destroying qp\n");
382			rdma_reject(id, NULL, 0);
383		} else
384			rc = rdma_accept(id, NULL);
385		break;
386	case RDMA_CM_EVENT_CONNECT_ERROR:
387		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n");
388		rc = -ETIMEDOUT;
389		break;
390	case RDMA_CM_EVENT_UNREACHABLE:
391		sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n");
392		rc = -ENETUNREACH;
393		break;
394	case RDMA_CM_EVENT_REJECTED:
395		sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n");
396		rc = -ECONNREFUSED;
397		break;
398	case RDMA_CM_EVENT_ESTABLISHED:
399		sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n");
400		sdp_sk(sk)->laddr =
401			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
402		rc = sdp_connected_handler(sk, event);
403		break;
404	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
405		sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
406
407		SDP_WLOCK(ssk);
408		if (ssk->state == TCPS_LAST_ACK) {
409			sdp_cancel_dreq_wait_timeout(ssk);
410
411			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
412				__func__);
413		}
414		ssk->qp_active = 0;
415		SDP_WUNLOCK(ssk);
416		rdma_disconnect(id);
417		SDP_WLOCK(ssk);
418		if (ssk->state != TCPS_TIME_WAIT) {
419			if (ssk->state == TCPS_CLOSE_WAIT) {
420				sdp_dbg(sk, "IB teardown while in "
421					"TCPS_CLOSE_WAIT taking reference to "
422					"let close() finish the work\n");
423			}
424			rc = sdp_disconnected_handler(sk);
425			if (rc)
426				rc = -EPIPE;
427		}
428		SDP_WUNLOCK(ssk);
429		break;
430	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
431		sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n");
432		SDP_WLOCK(ssk);
433		rc = sdp_disconnected_handler(sk);
434		SDP_WUNLOCK(ssk);
435		break;
436	case RDMA_CM_EVENT_DEVICE_REMOVAL:
437		sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
438		rc = -ENETRESET;
439		break;
440	default:
441		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
442		       event->event);
443		rc = -ECONNABORTED;
444		break;
445	}
446
447	sdp_dbg(sk, "event %s (%d) done. status %d\n",
448	    rdma_event_msg(event->event), event->event, rc);
449
450	if (rc) {
451		SDP_WLOCK(ssk);
452		if (ssk->id == id) {
453			ssk->id = NULL;
454			id->qp = NULL;
455			id->context = NULL;
456			if (sdp_notify(ssk, -rc))
457				SDP_WUNLOCK(ssk);
458		} else
459			SDP_WUNLOCK(ssk);
460	}
461
462	return rc;
463}
464