1/* 2 * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 * $Id$ 33 */ 34#include "sdp.h" 35 36#define SDP_MAJV_MINV 0x22 37 38SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of " 39 "type Infiniband"); 40 41enum { 42 SDP_HH_SIZE = 76, 43 SDP_HAH_SIZE = 180, 44}; 45 46static void 47sdp_qp_event_handler(struct ib_event *event, void *data) 48{ 49} 50 51static int 52sdp_get_max_dev_sge(struct ib_device *dev) 53{ 54 struct ib_device_attr *device_attr; 55 static int max_sges = -1; 56 57 if (max_sges > 0) 58 goto out; 59 60 device_attr = &dev->attrs; 61 max_sges = device_attr->max_sge; 62 63out: 64 return max_sges; 65} 66 67static int 68sdp_init_qp(struct socket *sk, struct rdma_cm_id *id) 69{ 70 struct ib_qp_init_attr qp_init_attr = { 71 .event_handler = sdp_qp_event_handler, 72 .cap.max_send_wr = SDP_TX_SIZE, 73 .cap.max_recv_wr = SDP_RX_SIZE, 74 .sq_sig_type = IB_SIGNAL_REQ_WR, 75 .qp_type = IB_QPT_RC, 76 }; 77 struct ib_device *device = id->device; 78 struct sdp_sock *ssk; 79 int rc; 80 81 sdp_dbg(sk, "%s\n", __func__); 82 83 ssk = sdp_sk(sk); 84 ssk->max_sge = sdp_get_max_dev_sge(device); 85 sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge); 86 87 qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES); 88 sdp_dbg(sk, "Setting max send sge to: %d\n", 89 qp_init_attr.cap.max_send_sge); 90 91 qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES); 92 sdp_dbg(sk, "Setting max recv sge to: %d\n", 93 qp_init_attr.cap.max_recv_sge); 94 95 ssk->sdp_dev = ib_get_client_data(device, &sdp_client); 96 if (!ssk->sdp_dev) { 97 sdp_warn(sk, "SDP not available on device %s\n", device->name); 98 rc = -ENODEV; 99 goto err_rx; 100 } 101 102 rc = sdp_rx_ring_create(ssk, device); 103 if (rc) 104 goto err_rx; 105 106 rc = sdp_tx_ring_create(ssk, device); 107 if (rc) 108 goto err_tx; 109 110 qp_init_attr.recv_cq = ssk->rx_ring.cq; 111 qp_init_attr.send_cq = ssk->tx_ring.cq; 112 113 rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr); 114 if (rc) { 115 sdp_warn(sk, "Unable to create QP: %d.\n", rc); 116 goto err_qp; 117 } 118 ssk->qp = id->qp; 119 ssk->ib_device = device; 120 ssk->qp_active = 1; 121 ssk->context.device = device; 122 123 sdp_dbg(sk, "%s done\n", __func__); 124 return 0; 125 126err_qp: 127 sdp_tx_ring_destroy(ssk); 128err_tx: 129 sdp_rx_ring_destroy(ssk); 130err_rx: 131 return rc; 132} 133 134static int 135sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id, 136 struct rdma_cm_event *event) 137{ 138 struct sockaddr_in *src_addr; 139 struct sockaddr_in *dst_addr; 140 struct socket *child; 141 const struct sdp_hh *h; 142 struct sdp_sock *ssk; 143 int rc; 144 145 sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); 146 147 h = event->param.conn.private_data; 148 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); 149 150 if (!h->max_adverts) 151 return -EINVAL; 152 153 child = sonewconn(sk, SS_ISCONNECTED); 154 if (!child) 155 return -ENOMEM; 156 157 ssk = sdp_sk(child); 158 rc = sdp_init_qp(child, id); 159 if (rc) 160 return rc; 161 SDP_WLOCK(ssk); 162 id->context = ssk; 163 ssk->id = id; 164 ssk->socket = child; 165 ssk->cred = crhold(child->so_cred); 166 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; 167 src_addr = (struct sockaddr_in *)&id->route.addr.src_addr; 168 ssk->fport = dst_addr->sin_port; 169 ssk->faddr = dst_addr->sin_addr.s_addr; 170 ssk->lport = src_addr->sin_port; 171 ssk->max_bufs = ntohs(h->bsdh.bufs); 172 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs); 173 ssk->min_bufs = tx_credits(ssk) / 4; 174 ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); 175 sdp_init_buffers(ssk, rcvbuf_initial_size); 176 ssk->state = TCPS_SYN_RECEIVED; 177 SDP_WUNLOCK(ssk); 178 179 return 0; 180} 181 182static int 183sdp_response_handler(struct socket *sk, struct rdma_cm_id *id, 184 struct rdma_cm_event *event) 185{ 186 const struct sdp_hah *h; 187 struct sockaddr_in *dst_addr; 188 struct sdp_sock *ssk; 189 sdp_dbg(sk, "%s\n", __func__); 190 191 ssk = sdp_sk(sk); 192 SDP_WLOCK(ssk); 193 ssk->state = TCPS_ESTABLISHED; 194 sdp_set_default_moderation(ssk); 195 if (ssk->flags & SDP_DROPPED) { 196 SDP_WUNLOCK(ssk); 197 return 0; 198 } 199 if (sk->so_options & SO_KEEPALIVE) 200 sdp_start_keepalive_timer(sk); 201 h = event->param.conn.private_data; 202 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); 203 ssk->max_bufs = ntohs(h->bsdh.bufs); 204 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs); 205 ssk->min_bufs = tx_credits(ssk) / 4; 206 ssk->xmit_size_goal = 207 ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); 208 ssk->poll_cq = 1; 209 210 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; 211 ssk->fport = dst_addr->sin_port; 212 ssk->faddr = dst_addr->sin_addr.s_addr; 213 soisconnected(sk); 214 SDP_WUNLOCK(ssk); 215 216 return 0; 217} 218 219static int 220sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event) 221{ 222 struct sdp_sock *ssk; 223 224 sdp_dbg(sk, "%s\n", __func__); 225 226 ssk = sdp_sk(sk); 227 SDP_WLOCK(ssk); 228 ssk->state = TCPS_ESTABLISHED; 229 230 sdp_set_default_moderation(ssk); 231 232 if (sk->so_options & SO_KEEPALIVE) 233 sdp_start_keepalive_timer(sk); 234 235 if ((ssk->flags & SDP_DROPPED) == 0) 236 soisconnected(sk); 237 SDP_WUNLOCK(ssk); 238 return 0; 239} 240 241static int 242sdp_disconnected_handler(struct socket *sk) 243{ 244 struct sdp_sock *ssk; 245 246 ssk = sdp_sk(sk); 247 sdp_dbg(sk, "%s\n", __func__); 248 249 SDP_WLOCK_ASSERT(ssk); 250 if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) { 251 sdp_connected_handler(sk, NULL); 252 253 if (rcv_nxt(ssk)) 254 return 0; 255 } 256 257 return -ECONNRESET; 258} 259 260int 261sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 262{ 263 struct rdma_conn_param conn_param; 264 struct socket *sk; 265 struct sdp_sock *ssk; 266 struct sdp_hah hah; 267 struct sdp_hh hh; 268 269 int rc = 0; 270 271 ssk = id->context; 272 sk = NULL; 273 if (ssk) 274 sk = ssk->socket; 275 if (!ssk || !sk || !ssk->id) { 276 sdp_dbg(sk, 277 "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n", 278 event->event, ssk, sk, id); 279 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? 280 -EINVAL : 0; 281 } 282 283 sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id); 284 switch (event->event) { 285 case RDMA_CM_EVENT_ADDR_RESOLVED: 286 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n"); 287 288 if (sdp_link_layer_ib_only && 289 rdma_node_get_transport(id->device->node_type) == 290 RDMA_TRANSPORT_IB && 291 rdma_port_get_link_layer(id->device, id->port_num) != 292 IB_LINK_LAYER_INFINIBAND) { 293 sdp_dbg(sk, "Link layer is: %d. Only IB link layer " 294 "is allowed\n", 295 rdma_port_get_link_layer(id->device, id->port_num)); 296 rc = -ENETUNREACH; 297 break; 298 } 299 300 rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); 301 break; 302 case RDMA_CM_EVENT_ADDR_ERROR: 303 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n"); 304 rc = -ENETUNREACH; 305 break; 306 case RDMA_CM_EVENT_ROUTE_RESOLVED: 307 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id); 308 rc = sdp_init_qp(sk, id); 309 if (rc) 310 break; 311 atomic_set(&sdp_sk(sk)->remote_credits, 312 rx_ring_posted(sdp_sk(sk))); 313 memset(&hh, 0, sizeof hh); 314 hh.bsdh.mid = SDP_MID_HELLO; 315 hh.bsdh.len = htonl(sizeof(struct sdp_hh)); 316 hh.max_adverts = 1; 317 hh.ipv_cap = 0x40; 318 hh.majv_minv = SDP_MAJV_MINV; 319 sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); 320 hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); 321 hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes); 322 hh.max_adverts = 0x1; 323 sdp_sk(sk)->laddr = 324 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; 325 memset(&conn_param, 0, sizeof conn_param); 326 conn_param.private_data_len = sizeof hh; 327 conn_param.private_data = &hh; 328 conn_param.responder_resources = 4 /* TODO */; 329 conn_param.initiator_depth = 4 /* TODO */; 330 conn_param.retry_count = SDP_RETRY_COUNT; 331 SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh); 332 rc = rdma_connect(id, &conn_param); 333 break; 334 case RDMA_CM_EVENT_ROUTE_ERROR: 335 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id); 336 rc = -ETIMEDOUT; 337 break; 338 case RDMA_CM_EVENT_CONNECT_REQUEST: 339 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n"); 340 rc = sdp_connect_handler(sk, id, event); 341 if (rc) { 342 sdp_dbg(sk, "Destroying qp\n"); 343 rdma_reject(id, NULL, 0); 344 break; 345 } 346 ssk = id->context; 347 atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); 348 memset(&hah, 0, sizeof hah); 349 hah.bsdh.mid = SDP_MID_HELLO_ACK; 350 hah.bsdh.bufs = htons(rx_ring_posted(ssk)); 351 hah.bsdh.len = htonl(sizeof(struct sdp_hah)); 352 hah.majv_minv = SDP_MAJV_MINV; 353 hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, 354 but just in case */ 355 hah.actrcvsz = htonl(ssk->recv_bytes); 356 memset(&conn_param, 0, sizeof conn_param); 357 conn_param.private_data_len = sizeof hah; 358 conn_param.private_data = &hah; 359 conn_param.responder_resources = 4 /* TODO */; 360 conn_param.initiator_depth = 4 /* TODO */; 361 conn_param.retry_count = SDP_RETRY_COUNT; 362 SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); 363 rc = rdma_accept(id, &conn_param); 364 if (rc) { 365 ssk->id = NULL; 366 id->qp = NULL; 367 id->context = NULL; 368 } 369 break; 370 case RDMA_CM_EVENT_CONNECT_RESPONSE: 371 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n"); 372 rc = sdp_response_handler(sk, id, event); 373 if (rc) { 374 sdp_dbg(sk, "Destroying qp\n"); 375 rdma_reject(id, NULL, 0); 376 } else 377 rc = rdma_accept(id, NULL); 378 break; 379 case RDMA_CM_EVENT_CONNECT_ERROR: 380 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n"); 381 rc = -ETIMEDOUT; 382 break; 383 case RDMA_CM_EVENT_UNREACHABLE: 384 sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n"); 385 rc = -ENETUNREACH; 386 break; 387 case RDMA_CM_EVENT_REJECTED: 388 sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n"); 389 rc = -ECONNREFUSED; 390 break; 391 case RDMA_CM_EVENT_ESTABLISHED: 392 sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n"); 393 sdp_sk(sk)->laddr = 394 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; 395 rc = sdp_connected_handler(sk, event); 396 break; 397 case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ 398 sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n"); 399 400 SDP_WLOCK(ssk); 401 if (ssk->state == TCPS_LAST_ACK) { 402 sdp_cancel_dreq_wait_timeout(ssk); 403 404 sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", 405 __func__); 406 } 407 ssk->qp_active = 0; 408 SDP_WUNLOCK(ssk); 409 rdma_disconnect(id); 410 SDP_WLOCK(ssk); 411 if (ssk->state != TCPS_TIME_WAIT) { 412 if (ssk->state == TCPS_CLOSE_WAIT) { 413 sdp_dbg(sk, "IB teardown while in " 414 "TCPS_CLOSE_WAIT taking reference to " 415 "let close() finish the work\n"); 416 } 417 rc = sdp_disconnected_handler(sk); 418 if (rc) 419 rc = -EPIPE; 420 } 421 SDP_WUNLOCK(ssk); 422 break; 423 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 424 sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n"); 425 SDP_WLOCK(ssk); 426 rc = sdp_disconnected_handler(sk); 427 SDP_WUNLOCK(ssk); 428 break; 429 case RDMA_CM_EVENT_DEVICE_REMOVAL: 430 sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n"); 431 rc = -ENETRESET; 432 break; 433 default: 434 printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", 435 event->event); 436 rc = -ECONNABORTED; 437 break; 438 } 439 440 sdp_dbg(sk, "event %d done. status %d\n", event->event, rc); 441 442 if (rc) { 443 SDP_WLOCK(ssk); 444 if (ssk->id == id) { 445 ssk->id = NULL; 446 id->qp = NULL; 447 id->context = NULL; 448 if (sdp_notify(ssk, -rc)) 449 SDP_WUNLOCK(ssk); 450 } else 451 SDP_WUNLOCK(ssk); 452 } 453 454 return rc; 455} 456