ipoib_cm.c revision 353183
1219820Sjeff/* 2219820Sjeff * Copyright (c) 2006 Mellanox Technologies. All rights reserved 3219820Sjeff * 4219820Sjeff * This software is available to you under a choice of one of two 5219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 6219820Sjeff * General Public License (GPL) Version 2, available from the file 7219820Sjeff * COPYING in the main directory of this source tree, or the 8219820Sjeff * OpenIB.org BSD license below: 9219820Sjeff * 10219820Sjeff * Redistribution and use in source and binary forms, with or 11219820Sjeff * without modification, are permitted provided that the following 12219820Sjeff * conditions are met: 13219820Sjeff * 14219820Sjeff * - Redistributions of source code must retain the above 15219820Sjeff * copyright notice, this list of conditions and the following 16219820Sjeff * disclaimer. 17219820Sjeff * 18219820Sjeff * - Redistributions in binary form must reproduce the above 19219820Sjeff * copyright notice, this list of conditions and the following 20219820Sjeff * disclaimer in the documentation and/or other materials 21219820Sjeff * provided with the distribution. 22219820Sjeff * 23219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30219820Sjeff * SOFTWARE. 31219820Sjeff */ 32219820Sjeff 33337096Shselasky#include <sys/cdefs.h> 34337096Shselasky__FBSDID("$FreeBSD$"); 35337096Shselasky 36219820Sjeff#include "ipoib.h" 37219820Sjeff 38219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_CM 39219820Sjeff 40219820Sjeff#include <netinet/ip.h> 41219820Sjeff#include <netinet/ip_icmp.h> 42219820Sjeff#include <netinet/icmp6.h> 43219820Sjeff 44219820Sjeff#include <rdma/ib_cm.h> 45219820Sjeff#include <rdma/ib_cache.h> 46219820Sjeff#include <linux/delay.h> 47219820Sjeff 48219820Sjeffint ipoib_max_conn_qp = 128; 49219820Sjeff 50219820Sjeffmodule_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444); 51219820SjeffMODULE_PARM_DESC(max_nonsrq_conn_qp, 52219820Sjeff "Max number of connected-mode QPs per interface " 53219820Sjeff "(applied only if shared receive queue is not available)"); 54219820Sjeff 55219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 56219820Sjeffstatic int data_debug_level; 57219820Sjeff 58219820Sjeffmodule_param_named(cm_data_debug_level, data_debug_level, int, 0644); 59219820SjeffMODULE_PARM_DESC(cm_data_debug_level, 60219820Sjeff "Enable data path debug tracing for connected mode if > 0"); 61219820Sjeff#endif 62219820Sjeff 63219820Sjeff#define IPOIB_CM_IETF_ID 0x1000000000000000ULL 64219820Sjeff 65219820Sjeff#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) 66219820Sjeff#define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ) 67219820Sjeff#define IPOIB_CM_RX_DELAY (3 * 256 * HZ) 68219820Sjeff#define IPOIB_CM_RX_UPDATE_MASK (0x3) 69219820Sjeff 70219820Sjeffstatic struct ib_qp_attr ipoib_cm_err_attr = { 71219820Sjeff .qp_state = IB_QPS_ERR 72219820Sjeff}; 73219820Sjeff 74219820Sjeff#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff 75219820Sjeff 76219820Sjeffstatic struct ib_send_wr ipoib_cm_rx_drain_wr = { 77219820Sjeff .wr_id = IPOIB_CM_RX_DRAIN_WRID, 78219820Sjeff .opcode = IB_WR_SEND, 79219820Sjeff}; 80219820Sjeff 81219820Sjeffstatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 82219820Sjeff struct ib_cm_event *event); 83219820Sjeff 84219820Sjeffstatic void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req) 85219820Sjeff{ 86219820Sjeff 87219820Sjeff ipoib_dma_unmap_rx(priv, (struct ipoib_rx_buf *)rx_req); 88219820Sjeff 89219820Sjeff} 90219820Sjeff 91219820Sjeffstatic int ipoib_cm_post_receive_srq(struct ipoib_dev_priv *priv, int id) 92219820Sjeff{ 93219820Sjeff struct ib_recv_wr *bad_wr; 94219820Sjeff struct ipoib_rx_buf *rx_req; 95219820Sjeff struct mbuf *m; 96219820Sjeff int ret; 97219820Sjeff int i; 98219820Sjeff 99219820Sjeff rx_req = (struct ipoib_rx_buf *)&priv->cm.srq_ring[id]; 100219820Sjeff for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) { 101219820Sjeff priv->cm.rx_sge[i].addr = rx_req->mapping[i]; 102219820Sjeff priv->cm.rx_sge[i].length = m->m_len; 103219820Sjeff } 104219820Sjeff 105219820Sjeff priv->cm.rx_wr.num_sge = i; 106219820Sjeff priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 107219820Sjeff 108219820Sjeff ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); 109219820Sjeff if (unlikely(ret)) { 110219820Sjeff ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); 111219820Sjeff ipoib_dma_unmap_rx(priv, rx_req); 112219820Sjeff m_freem(priv->cm.srq_ring[id].mb); 113219820Sjeff priv->cm.srq_ring[id].mb = NULL; 114219820Sjeff } 115219820Sjeff 116219820Sjeff return ret; 117219820Sjeff} 118219820Sjeff 119219820Sjeffstatic int ipoib_cm_post_receive_nonsrq(struct ipoib_dev_priv *priv, 120219820Sjeff struct ipoib_cm_rx *rx, 121219820Sjeff struct ib_recv_wr *wr, 122219820Sjeff struct ib_sge *sge, int id) 123219820Sjeff{ 124219820Sjeff struct ipoib_rx_buf *rx_req; 125219820Sjeff struct ib_recv_wr *bad_wr; 126219820Sjeff struct mbuf *m; 127219820Sjeff int ret; 128219820Sjeff int i; 129219820Sjeff 130219820Sjeff rx_req = (struct ipoib_rx_buf *)&rx->rx_ring[id]; 131219820Sjeff for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) { 132219820Sjeff sge[i].addr = rx_req->mapping[i]; 133219820Sjeff sge[i].length = m->m_len; 134219820Sjeff } 135219820Sjeff 136219820Sjeff wr->num_sge = i; 137219820Sjeff wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 138219820Sjeff 139219820Sjeff ret = ib_post_recv(rx->qp, wr, &bad_wr); 140219820Sjeff if (unlikely(ret)) { 141219820Sjeff ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); 142219820Sjeff ipoib_dma_unmap_rx(priv, rx_req); 143219820Sjeff m_freem(rx->rx_ring[id].mb); 144219820Sjeff rx->rx_ring[id].mb = NULL; 145219820Sjeff } 146219820Sjeff 147219820Sjeff return ret; 148219820Sjeff} 149219820Sjeff 150219820Sjeffstatic struct mbuf * 151219820Sjeffipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req) 152219820Sjeff{ 153219820Sjeff return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req, 154219820Sjeff priv->cm.max_cm_mtu); 155219820Sjeff} 156219820Sjeff 157219820Sjeffstatic void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv, 158219820Sjeff struct ipoib_cm_rx_buf *rx_ring) 159219820Sjeff{ 160219820Sjeff int i; 161219820Sjeff 162219820Sjeff for (i = 0; i < ipoib_recvq_size; ++i) 163219820Sjeff if (rx_ring[i].mb) { 164219820Sjeff ipoib_cm_dma_unmap_rx(priv, &rx_ring[i]); 165219820Sjeff m_freem(rx_ring[i].mb); 166219820Sjeff } 167219820Sjeff 168219820Sjeff kfree(rx_ring); 169219820Sjeff} 170219820Sjeff 171219820Sjeffstatic void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) 172219820Sjeff{ 173219820Sjeff struct ib_send_wr *bad_wr; 174219820Sjeff struct ipoib_cm_rx *p; 175219820Sjeff 176219820Sjeff /* We only reserved 1 extra slot in CQ for drain WRs, so 177219820Sjeff * make sure we have at most 1 outstanding WR. */ 178219820Sjeff if (list_empty(&priv->cm.rx_flush_list) || 179219820Sjeff !list_empty(&priv->cm.rx_drain_list)) 180219820Sjeff return; 181219820Sjeff 182219820Sjeff /* 183219820Sjeff * QPs on flush list are error state. This way, a "flush 184219820Sjeff * error" WC will be immediately generated for each WR we post. 185219820Sjeff */ 186219820Sjeff p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); 187219820Sjeff if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) 188219820Sjeff ipoib_warn(priv, "failed to post drain wr\n"); 189219820Sjeff 190219820Sjeff list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); 191219820Sjeff} 192219820Sjeff 193219820Sjeffstatic void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) 194219820Sjeff{ 195219820Sjeff struct ipoib_cm_rx *p = ctx; 196219820Sjeff struct ipoib_dev_priv *priv = p->priv; 197219820Sjeff unsigned long flags; 198219820Sjeff 199219820Sjeff if (event->event != IB_EVENT_QP_LAST_WQE_REACHED) 200219820Sjeff return; 201219820Sjeff 202219820Sjeff spin_lock_irqsave(&priv->lock, flags); 203219820Sjeff list_move(&p->list, &priv->cm.rx_flush_list); 204219820Sjeff p->state = IPOIB_CM_RX_FLUSH; 205219820Sjeff ipoib_cm_start_rx_drain(priv); 206219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 207219820Sjeff} 208219820Sjeff 209219820Sjeffstatic struct ib_qp *ipoib_cm_create_rx_qp(struct ipoib_dev_priv *priv, 210219820Sjeff struct ipoib_cm_rx *p) 211219820Sjeff{ 212219820Sjeff struct ib_qp_init_attr attr = { 213219820Sjeff .event_handler = ipoib_cm_rx_event_handler, 214219820Sjeff .send_cq = priv->recv_cq, /* For drain WR */ 215219820Sjeff .recv_cq = priv->recv_cq, 216219820Sjeff .srq = priv->cm.srq, 217219820Sjeff .cap.max_send_wr = 1, /* For drain WR */ 218219820Sjeff .cap.max_send_sge = 1, 219219820Sjeff .sq_sig_type = IB_SIGNAL_ALL_WR, 220219820Sjeff .qp_type = IB_QPT_RC, 221219820Sjeff .qp_context = p, 222219820Sjeff }; 223219820Sjeff 224219820Sjeff if (!ipoib_cm_has_srq(priv)) { 225219820Sjeff attr.cap.max_recv_wr = ipoib_recvq_size; 226219820Sjeff attr.cap.max_recv_sge = priv->cm.num_frags; 227219820Sjeff } 228219820Sjeff 229219820Sjeff return ib_create_qp(priv->pd, &attr); 230219820Sjeff} 231219820Sjeff 232219820Sjeffstatic int ipoib_cm_modify_rx_qp(struct ipoib_dev_priv *priv, 233219820Sjeff struct ib_cm_id *cm_id, struct ib_qp *qp, 234219820Sjeff unsigned psn) 235219820Sjeff{ 236219820Sjeff struct ib_qp_attr qp_attr; 237219820Sjeff int qp_attr_mask, ret; 238219820Sjeff 239219820Sjeff qp_attr.qp_state = IB_QPS_INIT; 240219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 241219820Sjeff if (ret) { 242219820Sjeff ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); 243219820Sjeff return ret; 244219820Sjeff } 245219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 246219820Sjeff if (ret) { 247219820Sjeff ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); 248219820Sjeff return ret; 249219820Sjeff } 250219820Sjeff qp_attr.qp_state = IB_QPS_RTR; 251219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 252219820Sjeff if (ret) { 253219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); 254219820Sjeff return ret; 255219820Sjeff } 256219820Sjeff qp_attr.rq_psn = psn; 257219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 258219820Sjeff if (ret) { 259219820Sjeff ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); 260219820Sjeff return ret; 261219820Sjeff } 262219820Sjeff 263219820Sjeff /* 264219820Sjeff * Current Mellanox HCA firmware won't generate completions 265219820Sjeff * with error for drain WRs unless the QP has been moved to 266219820Sjeff * RTS first. This work-around leaves a window where a QP has 267219820Sjeff * moved to error asynchronously, but this will eventually get 268219820Sjeff * fixed in firmware, so let's not error out if modify QP 269219820Sjeff * fails. 270219820Sjeff */ 271219820Sjeff qp_attr.qp_state = IB_QPS_RTS; 272219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 273219820Sjeff if (ret) { 274219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); 275219820Sjeff return 0; 276219820Sjeff } 277219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 278219820Sjeff if (ret) { 279219820Sjeff ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); 280219820Sjeff return 0; 281219820Sjeff } 282219820Sjeff 283219820Sjeff return 0; 284219820Sjeff} 285219820Sjeff 286219820Sjeffstatic void ipoib_cm_init_rx_wr(struct ipoib_dev_priv *priv, 287219820Sjeff struct ib_recv_wr *wr, 288219820Sjeff struct ib_sge *sge) 289219820Sjeff{ 290219820Sjeff int i; 291219820Sjeff 292219820Sjeff for (i = 0; i < IPOIB_CM_RX_SG; i++) 293331769Shselasky sge[i].lkey = priv->pd->local_dma_lkey; 294219820Sjeff 295219820Sjeff wr->next = NULL; 296219820Sjeff wr->sg_list = sge; 297219820Sjeff wr->num_sge = 1; 298219820Sjeff} 299219820Sjeff 300219820Sjeffstatic int ipoib_cm_nonsrq_init_rx(struct ipoib_dev_priv *priv, 301219820Sjeff struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) 302219820Sjeff{ 303219820Sjeff struct { 304219820Sjeff struct ib_recv_wr wr; 305219820Sjeff struct ib_sge sge[IPOIB_CM_RX_SG]; 306219820Sjeff } *t; 307219820Sjeff int ret; 308219820Sjeff int i; 309219820Sjeff 310219820Sjeff rx->rx_ring = kzalloc(ipoib_recvq_size * sizeof *rx->rx_ring, GFP_KERNEL); 311219820Sjeff if (!rx->rx_ring) { 312219820Sjeff printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n", 313219820Sjeff priv->ca->name, ipoib_recvq_size); 314219820Sjeff return -ENOMEM; 315219820Sjeff } 316219820Sjeff 317219820Sjeff memset(rx->rx_ring, 0, ipoib_recvq_size * sizeof *rx->rx_ring); 318219820Sjeff 319219820Sjeff t = kmalloc(sizeof *t, GFP_KERNEL); 320219820Sjeff if (!t) { 321219820Sjeff ret = -ENOMEM; 322219820Sjeff goto err_free; 323219820Sjeff } 324219820Sjeff 325219820Sjeff ipoib_cm_init_rx_wr(priv, &t->wr, t->sge); 326219820Sjeff 327219820Sjeff spin_lock_irq(&priv->lock); 328219820Sjeff 329219820Sjeff if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { 330219820Sjeff spin_unlock_irq(&priv->lock); 331219820Sjeff ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0); 332219820Sjeff ret = -EINVAL; 333219820Sjeff goto err_free; 334219820Sjeff } else 335219820Sjeff ++priv->cm.nonsrq_conn_qp; 336219820Sjeff 337219820Sjeff spin_unlock_irq(&priv->lock); 338219820Sjeff 339219820Sjeff for (i = 0; i < ipoib_recvq_size; ++i) { 340219820Sjeff if (!ipoib_cm_alloc_rx_mb(priv, &rx->rx_ring[i])) { 341219820Sjeff ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 342219820Sjeff ret = -ENOMEM; 343219820Sjeff goto err_count; 344219820Sjeff } 345219820Sjeff ret = ipoib_cm_post_receive_nonsrq(priv, rx, &t->wr, t->sge, i); 346219820Sjeff if (ret) { 347219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " 348219820Sjeff "failed for buf %d\n", i); 349219820Sjeff ret = -EIO; 350219820Sjeff goto err_count; 351219820Sjeff } 352219820Sjeff } 353219820Sjeff 354219820Sjeff rx->recv_count = ipoib_recvq_size; 355219820Sjeff 356219820Sjeff kfree(t); 357219820Sjeff 358219820Sjeff return 0; 359219820Sjeff 360219820Sjefferr_count: 361219820Sjeff spin_lock_irq(&priv->lock); 362219820Sjeff --priv->cm.nonsrq_conn_qp; 363219820Sjeff spin_unlock_irq(&priv->lock); 364219820Sjeff 365219820Sjefferr_free: 366219820Sjeff kfree(t); 367219820Sjeff ipoib_cm_free_rx_ring(priv, rx->rx_ring); 368219820Sjeff 369219820Sjeff return ret; 370219820Sjeff} 371219820Sjeff 372219820Sjeffstatic int ipoib_cm_send_rep(struct ipoib_dev_priv *priv, struct ib_cm_id *cm_id, 373219820Sjeff struct ib_qp *qp, struct ib_cm_req_event_param *req, 374219820Sjeff unsigned psn) 375219820Sjeff{ 376219820Sjeff struct ipoib_cm_data data = {}; 377219820Sjeff struct ib_cm_rep_param rep = {}; 378219820Sjeff 379219820Sjeff data.qpn = cpu_to_be32(priv->qp->qp_num); 380219820Sjeff data.mtu = cpu_to_be32(priv->cm.max_cm_mtu); 381219820Sjeff 382219820Sjeff rep.private_data = &data; 383219820Sjeff rep.private_data_len = sizeof data; 384219820Sjeff rep.flow_control = 0; 385219820Sjeff rep.rnr_retry_count = req->rnr_retry_count; 386219820Sjeff rep.srq = ipoib_cm_has_srq(priv); 387219820Sjeff rep.qp_num = qp->qp_num; 388219820Sjeff rep.starting_psn = psn; 389219820Sjeff return ib_send_cm_rep(cm_id, &rep); 390219820Sjeff} 391219820Sjeff 392219820Sjeffstatic int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 393219820Sjeff{ 394219820Sjeff struct ipoib_dev_priv *priv = cm_id->context; 395219820Sjeff struct ipoib_cm_rx *p; 396219820Sjeff unsigned psn; 397219820Sjeff int ret; 398219820Sjeff 399219820Sjeff ipoib_dbg(priv, "REQ arrived\n"); 400219820Sjeff p = kzalloc(sizeof *p, GFP_KERNEL); 401219820Sjeff if (!p) 402219820Sjeff return -ENOMEM; 403219820Sjeff p->priv = priv; 404219820Sjeff p->id = cm_id; 405219820Sjeff cm_id->context = p; 406219820Sjeff p->state = IPOIB_CM_RX_LIVE; 407219820Sjeff p->jiffies = jiffies; 408219820Sjeff INIT_LIST_HEAD(&p->list); 409219820Sjeff 410219820Sjeff p->qp = ipoib_cm_create_rx_qp(priv, p); 411219820Sjeff if (IS_ERR(p->qp)) { 412219820Sjeff ret = PTR_ERR(p->qp); 413219820Sjeff goto err_qp; 414219820Sjeff } 415219820Sjeff 416219820Sjeff psn = random() & 0xffffff; 417219820Sjeff ret = ipoib_cm_modify_rx_qp(priv, cm_id, p->qp, psn); 418219820Sjeff if (ret) 419219820Sjeff goto err_modify; 420219820Sjeff 421219820Sjeff if (!ipoib_cm_has_srq(priv)) { 422219820Sjeff ret = ipoib_cm_nonsrq_init_rx(priv, cm_id, p); 423219820Sjeff if (ret) 424219820Sjeff goto err_modify; 425219820Sjeff } 426219820Sjeff 427219820Sjeff spin_lock_irq(&priv->lock); 428219820Sjeff queue_delayed_work(ipoib_workqueue, 429219820Sjeff &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 430219820Sjeff /* Add this entry to passive ids list head, but do not re-add it 431219820Sjeff * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ 432219820Sjeff p->jiffies = jiffies; 433219820Sjeff if (p->state == IPOIB_CM_RX_LIVE) 434219820Sjeff list_move(&p->list, &priv->cm.passive_ids); 435219820Sjeff spin_unlock_irq(&priv->lock); 436219820Sjeff 437219820Sjeff ret = ipoib_cm_send_rep(priv, cm_id, p->qp, &event->param.req_rcvd, psn); 438219820Sjeff if (ret) { 439219820Sjeff ipoib_warn(priv, "failed to send REP: %d\n", ret); 440219820Sjeff if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) 441219820Sjeff ipoib_warn(priv, "unable to move qp to error state\n"); 442219820Sjeff } 443219820Sjeff return 0; 444219820Sjeff 445219820Sjefferr_modify: 446219820Sjeff ib_destroy_qp(p->qp); 447219820Sjefferr_qp: 448219820Sjeff kfree(p); 449219820Sjeff return ret; 450219820Sjeff} 451219820Sjeff 452219820Sjeffstatic int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, 453219820Sjeff struct ib_cm_event *event) 454219820Sjeff{ 455219820Sjeff struct ipoib_cm_rx *p; 456219820Sjeff struct ipoib_dev_priv *priv; 457219820Sjeff 458219820Sjeff switch (event->event) { 459219820Sjeff case IB_CM_REQ_RECEIVED: 460219820Sjeff return ipoib_cm_req_handler(cm_id, event); 461219820Sjeff case IB_CM_DREQ_RECEIVED: 462219820Sjeff p = cm_id->context; 463219820Sjeff ib_send_cm_drep(cm_id, NULL, 0); 464219820Sjeff /* Fall through */ 465219820Sjeff case IB_CM_REJ_RECEIVED: 466219820Sjeff p = cm_id->context; 467219820Sjeff priv = p->priv; 468219820Sjeff if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) 469219820Sjeff ipoib_warn(priv, "unable to move qp to error state\n"); 470219820Sjeff /* Fall through */ 471219820Sjeff default: 472219820Sjeff return 0; 473219820Sjeff } 474219820Sjeff} 475219820Sjeff 476219820Sjeffvoid ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) 477219820Sjeff{ 478219820Sjeff struct ipoib_cm_rx_buf saverx; 479219820Sjeff struct ipoib_cm_rx_buf *rx_ring; 480219820Sjeff unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); 481219820Sjeff struct ifnet *dev = priv->dev; 482219820Sjeff struct mbuf *mb, *newmb; 483219820Sjeff struct ipoib_cm_rx *p; 484219820Sjeff int has_srq; 485219820Sjeff u_short proto; 486219820Sjeff 487298458Shselasky CURVNET_SET_QUIET(dev->if_vnet); 488298458Shselasky 489219820Sjeff ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", 490219820Sjeff wr_id, wc->status); 491219820Sjeff 492219820Sjeff if (unlikely(wr_id >= ipoib_recvq_size)) { 493219820Sjeff if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { 494219820Sjeff spin_lock(&priv->lock); 495219820Sjeff list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 496219820Sjeff ipoib_cm_start_rx_drain(priv); 497219820Sjeff if (priv->cm.id != NULL) 498219820Sjeff queue_work(ipoib_workqueue, 499219820Sjeff &priv->cm.rx_reap_task); 500219820Sjeff spin_unlock(&priv->lock); 501219820Sjeff } else 502219820Sjeff ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 503219820Sjeff wr_id, ipoib_recvq_size); 504298458Shselasky goto done; 505219820Sjeff } 506219820Sjeff 507219820Sjeff p = wc->qp->qp_context; 508219820Sjeff 509219820Sjeff has_srq = ipoib_cm_has_srq(priv); 510219820Sjeff rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring; 511219820Sjeff 512219820Sjeff mb = rx_ring[wr_id].mb; 513219820Sjeff 514219820Sjeff if (unlikely(wc->status != IB_WC_SUCCESS)) { 515219820Sjeff ipoib_dbg(priv, "cm recv error " 516219820Sjeff "(status=%d, wrid=%d vend_err %x)\n", 517219820Sjeff wc->status, wr_id, wc->vendor_err); 518272225Sglebius if_inc_counter(dev, IFCOUNTER_IERRORS, 1); 519219820Sjeff if (has_srq) 520219820Sjeff goto repost; 521219820Sjeff else { 522219820Sjeff if (!--p->recv_count) { 523219820Sjeff spin_lock(&priv->lock); 524219820Sjeff list_move(&p->list, &priv->cm.rx_reap_list); 525219820Sjeff queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); 526219820Sjeff spin_unlock(&priv->lock); 527219820Sjeff } 528298458Shselasky goto done; 529219820Sjeff } 530219820Sjeff } 531219820Sjeff 532219820Sjeff if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { 533219820Sjeff if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 534219820Sjeff p->jiffies = jiffies; 535219820Sjeff /* Move this entry to list head, but do not re-add it 536219820Sjeff * if it has been moved out of list. */ 537219820Sjeff if (p->state == IPOIB_CM_RX_LIVE) 538219820Sjeff list_move(&p->list, &priv->cm.passive_ids); 539219820Sjeff } 540219820Sjeff } 541219820Sjeff 542219820Sjeff memcpy(&saverx, &rx_ring[wr_id], sizeof(saverx)); 543219820Sjeff newmb = ipoib_cm_alloc_rx_mb(priv, &rx_ring[wr_id]); 544219820Sjeff if (unlikely(!newmb)) { 545219820Sjeff /* 546219820Sjeff * If we can't allocate a new RX buffer, dump 547219820Sjeff * this packet and reuse the old buffer. 548219820Sjeff */ 549219820Sjeff ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); 550272225Sglebius if_inc_counter(dev, IFCOUNTER_IERRORS, 1); 551219820Sjeff memcpy(&rx_ring[wr_id], &saverx, sizeof(saverx)); 552219820Sjeff goto repost; 553219820Sjeff } 554219820Sjeff 555219820Sjeff ipoib_cm_dma_unmap_rx(priv, &saverx); 556219820Sjeff 557219820Sjeff ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 558219820Sjeff wc->byte_len, wc->slid); 559219820Sjeff 560219820Sjeff ipoib_dma_mb(priv, mb, wc->byte_len); 561219820Sjeff 562272225Sglebius if_inc_counter(dev, IFCOUNTER_IPACKETS, 1); 563272225Sglebius if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len); 564219820Sjeff 565219820Sjeff mb->m_pkthdr.rcvif = dev; 566219820Sjeff proto = *mtod(mb, uint16_t *); 567219820Sjeff m_adj(mb, IPOIB_ENCAP_LEN); 568219820Sjeff 569219820Sjeff IPOIB_MTAP_PROTO(dev, mb, proto); 570219820Sjeff ipoib_demux(dev, mb, ntohs(proto)); 571219820Sjeff 572219820Sjeffrepost: 573219820Sjeff if (has_srq) { 574219820Sjeff if (unlikely(ipoib_cm_post_receive_srq(priv, wr_id))) 575219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " 576219820Sjeff "for buf %d\n", wr_id); 577219820Sjeff } else { 578219820Sjeff if (unlikely(ipoib_cm_post_receive_nonsrq(priv, p, 579219820Sjeff &priv->cm.rx_wr, 580219820Sjeff priv->cm.rx_sge, 581219820Sjeff wr_id))) { 582219820Sjeff --p->recv_count; 583219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " 584219820Sjeff "for buf %d\n", wr_id); 585219820Sjeff } 586219820Sjeff } 587298458Shselaskydone: 588298458Shselasky CURVNET_RESTORE(); 589298458Shselasky return; 590219820Sjeff} 591219820Sjeff 592219820Sjeffstatic inline int post_send(struct ipoib_dev_priv *priv, 593219820Sjeff struct ipoib_cm_tx *tx, 594219820Sjeff struct ipoib_cm_tx_buf *tx_req, 595219820Sjeff unsigned int wr_id) 596219820Sjeff{ 597219820Sjeff struct ib_send_wr *bad_wr; 598219820Sjeff struct mbuf *mb = tx_req->mb; 599219820Sjeff u64 *mapping = tx_req->mapping; 600219820Sjeff struct mbuf *m; 601219820Sjeff int i; 602219820Sjeff 603219820Sjeff for (m = mb, i = 0; m != NULL; m = m->m_next, i++) { 604219820Sjeff priv->tx_sge[i].addr = mapping[i]; 605219820Sjeff priv->tx_sge[i].length = m->m_len; 606219820Sjeff } 607331769Shselasky priv->tx_wr.wr.num_sge = i; 608331769Shselasky priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM; 609331769Shselasky priv->tx_wr.wr.opcode = IB_WR_SEND; 610219820Sjeff 611331769Shselasky return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr); 612219820Sjeff} 613219820Sjeff 614219820Sjeffvoid ipoib_cm_send(struct ipoib_dev_priv *priv, struct mbuf *mb, struct ipoib_cm_tx *tx) 615219820Sjeff{ 616219820Sjeff struct ipoib_cm_tx_buf *tx_req; 617219820Sjeff struct ifnet *dev = priv->dev; 618219820Sjeff 619353183Shselasky if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) { 620353183Shselasky while (ipoib_poll_tx(priv, false)) 621353183Shselasky ; /* nothing */ 622353183Shselasky } 623219820Sjeff 624219820Sjeff m_adj(mb, sizeof(struct ipoib_pseudoheader)); 625219820Sjeff if (unlikely(mb->m_pkthdr.len > tx->mtu)) { 626219820Sjeff ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 627219820Sjeff mb->m_pkthdr.len, tx->mtu); 628272225Sglebius if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 629219820Sjeff ipoib_cm_mb_too_long(priv, mb, IPOIB_CM_MTU(tx->mtu)); 630219820Sjeff return; 631219820Sjeff } 632219820Sjeff 633219820Sjeff ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", 634219820Sjeff tx->tx_head, mb->m_pkthdr.len, tx->qp->qp_num); 635219820Sjeff 636219820Sjeff 637219820Sjeff /* 638219820Sjeff * We put the mb into the tx_ring _before_ we call post_send() 639219820Sjeff * because it's entirely possible that the completion handler will 640219820Sjeff * run before we execute anything after the post_send(). That 641219820Sjeff * means we have to make sure everything is properly recorded and 642219820Sjeff * our state is consistent before we call post_send(). 643219820Sjeff */ 644219820Sjeff tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; 645219820Sjeff tx_req->mb = mb; 646219820Sjeff if (unlikely(ipoib_dma_map_tx(priv->ca, (struct ipoib_tx_buf *)tx_req, 647219820Sjeff priv->cm.num_frags))) { 648272225Sglebius if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 649219820Sjeff if (tx_req->mb) 650219820Sjeff m_freem(tx_req->mb); 651219820Sjeff return; 652219820Sjeff } 653219820Sjeff 654219820Sjeff if (unlikely(post_send(priv, tx, tx_req, tx->tx_head & (ipoib_sendq_size - 1)))) { 655219820Sjeff ipoib_warn(priv, "post_send failed\n"); 656272225Sglebius if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 657219820Sjeff ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); 658219820Sjeff m_freem(mb); 659219820Sjeff } else { 660219820Sjeff ++tx->tx_head; 661219820Sjeff 662219820Sjeff if (++priv->tx_outstanding == ipoib_sendq_size) { 663219820Sjeff ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", 664219820Sjeff tx->qp->qp_num); 665219820Sjeff if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) 666219820Sjeff ipoib_warn(priv, "request notify on send CQ failed\n"); 667219820Sjeff dev->if_drv_flags |= IFF_DRV_OACTIVE; 668219820Sjeff } 669219820Sjeff } 670219820Sjeff 671219820Sjeff} 672219820Sjeff 673219820Sjeffvoid ipoib_cm_handle_tx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) 674219820Sjeff{ 675219820Sjeff struct ipoib_cm_tx *tx = wc->qp->qp_context; 676219820Sjeff unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; 677219820Sjeff struct ifnet *dev = priv->dev; 678219820Sjeff struct ipoib_cm_tx_buf *tx_req; 679219820Sjeff 680219820Sjeff ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", 681219820Sjeff wr_id, wc->status); 682219820Sjeff 683219820Sjeff if (unlikely(wr_id >= ipoib_sendq_size)) { 684219820Sjeff ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n", 685219820Sjeff wr_id, ipoib_sendq_size); 686219820Sjeff return; 687219820Sjeff } 688219820Sjeff 689219820Sjeff tx_req = &tx->tx_ring[wr_id]; 690219820Sjeff 691219820Sjeff ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); 692219820Sjeff 693219820Sjeff /* FIXME: is this right? Shouldn't we only increment on success? */ 694272225Sglebius if_inc_counter(dev, IFCOUNTER_OPACKETS, 1); 695219820Sjeff 696219820Sjeff m_freem(tx_req->mb); 697219820Sjeff 698219820Sjeff ++tx->tx_tail; 699219820Sjeff if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 700219820Sjeff (dev->if_drv_flags & IFF_DRV_OACTIVE) != 0 && 701219820Sjeff test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 702219820Sjeff dev->if_drv_flags &= ~IFF_DRV_OACTIVE; 703219820Sjeff 704219820Sjeff if (wc->status != IB_WC_SUCCESS && 705219820Sjeff wc->status != IB_WC_WR_FLUSH_ERR) { 706219820Sjeff struct ipoib_path *path; 707219820Sjeff 708219820Sjeff ipoib_dbg(priv, "failed cm send event " 709219820Sjeff "(status=%d, wrid=%d vend_err %x)\n", 710219820Sjeff wc->status, wr_id, wc->vendor_err); 711219820Sjeff 712219820Sjeff path = tx->path; 713219820Sjeff 714219820Sjeff if (path) { 715219820Sjeff path->cm = NULL; 716219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 717219820Sjeff list_del(&path->list); 718219820Sjeff } 719219820Sjeff 720219820Sjeff if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 721219820Sjeff list_move(&tx->list, &priv->cm.reap_list); 722219820Sjeff queue_work(ipoib_workqueue, &priv->cm.reap_task); 723219820Sjeff } 724219820Sjeff 725219820Sjeff clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); 726219820Sjeff } 727219820Sjeff 728219820Sjeff} 729219820Sjeff 730219820Sjeffint ipoib_cm_dev_open(struct ipoib_dev_priv *priv) 731219820Sjeff{ 732219820Sjeff int ret; 733219820Sjeff 734219820Sjeff if (!IPOIB_CM_SUPPORTED(IF_LLADDR(priv->dev))) 735219820Sjeff return 0; 736219820Sjeff 737219820Sjeff priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, priv); 738219820Sjeff if (IS_ERR(priv->cm.id)) { 739219820Sjeff printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); 740219820Sjeff ret = PTR_ERR(priv->cm.id); 741219820Sjeff goto err_cm; 742219820Sjeff } 743219820Sjeff 744331769Shselasky ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), 0); 745219820Sjeff if (ret) { 746219820Sjeff printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, 747219820Sjeff IPOIB_CM_IETF_ID | priv->qp->qp_num); 748219820Sjeff goto err_listen; 749219820Sjeff } 750219820Sjeff 751219820Sjeff return 0; 752219820Sjeff 753219820Sjefferr_listen: 754219820Sjeff ib_destroy_cm_id(priv->cm.id); 755219820Sjefferr_cm: 756219820Sjeff priv->cm.id = NULL; 757219820Sjeff return ret; 758219820Sjeff} 759219820Sjeff 760219820Sjeffstatic void ipoib_cm_free_rx_reap_list(struct ipoib_dev_priv *priv) 761219820Sjeff{ 762219820Sjeff struct ipoib_cm_rx *rx, *n; 763219820Sjeff LIST_HEAD(list); 764219820Sjeff 765219820Sjeff spin_lock_irq(&priv->lock); 766219820Sjeff list_splice_init(&priv->cm.rx_reap_list, &list); 767219820Sjeff spin_unlock_irq(&priv->lock); 768219820Sjeff 769219820Sjeff list_for_each_entry_safe(rx, n, &list, list) { 770219820Sjeff ib_destroy_cm_id(rx->id); 771219820Sjeff ib_destroy_qp(rx->qp); 772219820Sjeff if (!ipoib_cm_has_srq(priv)) { 773219820Sjeff ipoib_cm_free_rx_ring(priv, rx->rx_ring); 774219820Sjeff spin_lock_irq(&priv->lock); 775219820Sjeff --priv->cm.nonsrq_conn_qp; 776219820Sjeff spin_unlock_irq(&priv->lock); 777219820Sjeff } 778219820Sjeff kfree(rx); 779219820Sjeff } 780219820Sjeff} 781219820Sjeff 782219820Sjeffvoid ipoib_cm_dev_stop(struct ipoib_dev_priv *priv) 783219820Sjeff{ 784219820Sjeff struct ipoib_cm_rx *p; 785219820Sjeff unsigned long begin; 786219820Sjeff int ret; 787219820Sjeff 788219820Sjeff if (!IPOIB_CM_SUPPORTED(IF_LLADDR(priv->dev)) || !priv->cm.id) 789219820Sjeff return; 790219820Sjeff 791219820Sjeff ib_destroy_cm_id(priv->cm.id); 792219820Sjeff priv->cm.id = NULL; 793219820Sjeff 794219820Sjeff cancel_work_sync(&priv->cm.rx_reap_task); 795219820Sjeff 796219820Sjeff spin_lock_irq(&priv->lock); 797219820Sjeff while (!list_empty(&priv->cm.passive_ids)) { 798219820Sjeff p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); 799219820Sjeff list_move(&p->list, &priv->cm.rx_error_list); 800219820Sjeff p->state = IPOIB_CM_RX_ERROR; 801219820Sjeff spin_unlock_irq(&priv->lock); 802219820Sjeff ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 803219820Sjeff if (ret) 804219820Sjeff ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 805219820Sjeff spin_lock_irq(&priv->lock); 806219820Sjeff } 807219820Sjeff 808219820Sjeff /* Wait for all RX to be drained */ 809219820Sjeff begin = jiffies; 810219820Sjeff 811219820Sjeff while (!list_empty(&priv->cm.rx_error_list) || 812219820Sjeff !list_empty(&priv->cm.rx_flush_list) || 813219820Sjeff !list_empty(&priv->cm.rx_drain_list)) { 814219820Sjeff if (time_after(jiffies, begin + 5 * HZ)) { 815219820Sjeff ipoib_warn(priv, "RX drain timing out\n"); 816219820Sjeff 817219820Sjeff /* 818219820Sjeff * assume the HW is wedged and just free up everything. 819219820Sjeff */ 820219820Sjeff list_splice_init(&priv->cm.rx_flush_list, 821219820Sjeff &priv->cm.rx_reap_list); 822219820Sjeff list_splice_init(&priv->cm.rx_error_list, 823219820Sjeff &priv->cm.rx_reap_list); 824219820Sjeff list_splice_init(&priv->cm.rx_drain_list, 825219820Sjeff &priv->cm.rx_reap_list); 826219820Sjeff break; 827219820Sjeff } 828219820Sjeff spin_unlock_irq(&priv->lock); 829219820Sjeff msleep(1); 830219820Sjeff ipoib_drain_cq(priv); 831219820Sjeff spin_lock_irq(&priv->lock); 832219820Sjeff } 833219820Sjeff 834219820Sjeff spin_unlock_irq(&priv->lock); 835219820Sjeff 836219820Sjeff ipoib_cm_free_rx_reap_list(priv); 837219820Sjeff 838331769Shselasky cancel_delayed_work_sync(&priv->cm.stale_task); 839219820Sjeff} 840219820Sjeff 841219820Sjeffstatic int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 842219820Sjeff{ 843219820Sjeff struct ipoib_cm_tx *p = cm_id->context; 844219820Sjeff struct ipoib_dev_priv *priv = p->priv; 845219820Sjeff struct ipoib_cm_data *data = event->private_data; 846219820Sjeff struct ifqueue mbqueue; 847219820Sjeff struct ib_qp_attr qp_attr; 848219820Sjeff int qp_attr_mask, ret; 849219820Sjeff struct mbuf *mb; 850219820Sjeff 851219820Sjeff ipoib_dbg(priv, "cm rep handler\n"); 852219820Sjeff p->mtu = be32_to_cpu(data->mtu); 853219820Sjeff 854219820Sjeff if (p->mtu <= IPOIB_ENCAP_LEN) { 855219820Sjeff ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n", 856219820Sjeff p->mtu, IPOIB_ENCAP_LEN); 857219820Sjeff return -EINVAL; 858219820Sjeff } 859219820Sjeff 860219820Sjeff qp_attr.qp_state = IB_QPS_RTR; 861219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 862219820Sjeff if (ret) { 863219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); 864219820Sjeff return ret; 865219820Sjeff } 866219820Sjeff 867219820Sjeff qp_attr.rq_psn = 0 /* FIXME */; 868219820Sjeff ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); 869219820Sjeff if (ret) { 870219820Sjeff ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); 871219820Sjeff return ret; 872219820Sjeff } 873219820Sjeff 874219820Sjeff qp_attr.qp_state = IB_QPS_RTS; 875219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 876219820Sjeff if (ret) { 877219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); 878219820Sjeff return ret; 879219820Sjeff } 880219820Sjeff ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); 881219820Sjeff if (ret) { 882219820Sjeff ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); 883219820Sjeff return ret; 884219820Sjeff } 885219820Sjeff 886219820Sjeff bzero(&mbqueue, sizeof(mbqueue)); 887219820Sjeff 888219820Sjeff spin_lock_irq(&priv->lock); 889219820Sjeff set_bit(IPOIB_FLAG_OPER_UP, &p->flags); 890219820Sjeff if (p->path) 891219820Sjeff for (;;) { 892219820Sjeff _IF_DEQUEUE(&p->path->queue, mb); 893219820Sjeff if (mb == NULL) 894219820Sjeff break; 895219820Sjeff _IF_ENQUEUE(&mbqueue, mb); 896219820Sjeff } 897219820Sjeff spin_unlock_irq(&priv->lock); 898219820Sjeff 899219820Sjeff for (;;) { 900219820Sjeff struct ifnet *dev = p->priv->dev; 901219820Sjeff _IF_DEQUEUE(&mbqueue, mb); 902219820Sjeff if (mb == NULL) 903219820Sjeff break; 904219820Sjeff mb->m_pkthdr.rcvif = dev; 905219820Sjeff if (dev->if_transmit(dev, mb)) 906219820Sjeff ipoib_warn(priv, "dev_queue_xmit failed " 907219820Sjeff "to requeue packet\n"); 908219820Sjeff } 909219820Sjeff 910219820Sjeff ret = ib_send_cm_rtu(cm_id, NULL, 0); 911219820Sjeff if (ret) { 912219820Sjeff ipoib_warn(priv, "failed to send RTU: %d\n", ret); 913219820Sjeff return ret; 914219820Sjeff } 915219820Sjeff return 0; 916219820Sjeff} 917219820Sjeff 918219820Sjeffstatic struct ib_qp *ipoib_cm_create_tx_qp(struct ipoib_dev_priv *priv, 919219820Sjeff struct ipoib_cm_tx *tx) 920219820Sjeff{ 921219820Sjeff struct ib_qp_init_attr attr = { 922219820Sjeff .send_cq = priv->send_cq, 923219820Sjeff .recv_cq = priv->recv_cq, 924219820Sjeff .srq = priv->cm.srq, 925219820Sjeff .cap.max_send_wr = ipoib_sendq_size, 926219820Sjeff .cap.max_send_sge = priv->cm.num_frags, 927219820Sjeff .sq_sig_type = IB_SIGNAL_ALL_WR, 928219820Sjeff .qp_type = IB_QPT_RC, 929219820Sjeff .qp_context = tx 930219820Sjeff }; 931219820Sjeff 932219820Sjeff return ib_create_qp(priv->pd, &attr); 933219820Sjeff} 934219820Sjeff 935219820Sjeffstatic int ipoib_cm_send_req(struct ipoib_dev_priv *priv, 936219820Sjeff struct ib_cm_id *id, struct ib_qp *qp, 937219820Sjeff u32 qpn, 938219820Sjeff struct ib_sa_path_rec *pathrec) 939219820Sjeff{ 940219820Sjeff struct ipoib_cm_data data = {}; 941219820Sjeff struct ib_cm_req_param req = {}; 942219820Sjeff 943219820Sjeff ipoib_dbg(priv, "cm send req\n"); 944219820Sjeff 945219820Sjeff data.qpn = cpu_to_be32(priv->qp->qp_num); 946219820Sjeff data.mtu = cpu_to_be32(priv->cm.max_cm_mtu); 947219820Sjeff 948219820Sjeff req.primary_path = pathrec; 949219820Sjeff req.alternate_path = NULL; 950219820Sjeff req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); 951219820Sjeff req.qp_num = qp->qp_num; 952219820Sjeff req.qp_type = qp->qp_type; 953219820Sjeff req.private_data = &data; 954219820Sjeff req.private_data_len = sizeof data; 955219820Sjeff req.flow_control = 0; 956219820Sjeff 957219820Sjeff req.starting_psn = 0; /* FIXME */ 958219820Sjeff 959219820Sjeff /* 960219820Sjeff * Pick some arbitrary defaults here; we could make these 961219820Sjeff * module parameters if anyone cared about setting them. 962219820Sjeff */ 963219820Sjeff req.responder_resources = 4; 964219820Sjeff req.remote_cm_response_timeout = 20; 965219820Sjeff req.local_cm_response_timeout = 20; 966219820Sjeff req.retry_count = 0; /* RFC draft warns against retries */ 967219820Sjeff req.rnr_retry_count = 0; /* RFC draft warns against retries */ 968219820Sjeff req.max_cm_retries = 15; 969219820Sjeff req.srq = ipoib_cm_has_srq(priv); 970219820Sjeff return ib_send_cm_req(id, &req); 971219820Sjeff} 972219820Sjeff 973219820Sjeffstatic int ipoib_cm_modify_tx_init(struct ipoib_dev_priv *priv, 974219820Sjeff struct ib_cm_id *cm_id, struct ib_qp *qp) 975219820Sjeff{ 976219820Sjeff struct ib_qp_attr qp_attr; 977219820Sjeff int qp_attr_mask, ret; 978219820Sjeff ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index); 979219820Sjeff if (ret) { 980219820Sjeff ipoib_warn(priv, "pkey 0x%x not found: %d\n", priv->pkey, ret); 981219820Sjeff return ret; 982219820Sjeff } 983219820Sjeff 984219820Sjeff qp_attr.qp_state = IB_QPS_INIT; 985219820Sjeff qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; 986219820Sjeff qp_attr.port_num = priv->port; 987219820Sjeff qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; 988219820Sjeff 989219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 990219820Sjeff if (ret) { 991219820Sjeff ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret); 992219820Sjeff return ret; 993219820Sjeff } 994219820Sjeff return 0; 995219820Sjeff} 996219820Sjeff 997219820Sjeffstatic int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, 998219820Sjeff struct ib_sa_path_rec *pathrec) 999219820Sjeff{ 1000219820Sjeff struct ipoib_dev_priv *priv = p->priv; 1001219820Sjeff int ret; 1002219820Sjeff 1003219820Sjeff p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, GFP_KERNEL); 1004219820Sjeff if (!p->tx_ring) { 1005219820Sjeff ipoib_warn(priv, "failed to allocate tx ring\n"); 1006219820Sjeff ret = -ENOMEM; 1007219820Sjeff goto err_tx; 1008219820Sjeff } 1009219820Sjeff memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); 1010219820Sjeff 1011219820Sjeff p->qp = ipoib_cm_create_tx_qp(p->priv, p); 1012219820Sjeff if (IS_ERR(p->qp)) { 1013219820Sjeff ret = PTR_ERR(p->qp); 1014219820Sjeff ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); 1015219820Sjeff goto err_qp; 1016219820Sjeff } 1017219820Sjeff 1018219820Sjeff p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p); 1019219820Sjeff if (IS_ERR(p->id)) { 1020219820Sjeff ret = PTR_ERR(p->id); 1021219820Sjeff ipoib_warn(priv, "failed to create tx cm id: %d\n", ret); 1022219820Sjeff goto err_id; 1023219820Sjeff } 1024219820Sjeff 1025219820Sjeff ret = ipoib_cm_modify_tx_init(p->priv, p->id, p->qp); 1026219820Sjeff if (ret) { 1027219820Sjeff ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret); 1028219820Sjeff goto err_modify; 1029219820Sjeff } 1030219820Sjeff 1031219820Sjeff ret = ipoib_cm_send_req(p->priv, p->id, p->qp, qpn, pathrec); 1032219820Sjeff if (ret) { 1033219820Sjeff ipoib_warn(priv, "failed to send cm req: %d\n", ret); 1034219820Sjeff goto err_send_cm; 1035219820Sjeff } 1036219820Sjeff 1037219820Sjeff ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n", 1038219820Sjeff p->qp->qp_num, pathrec->dgid.raw, qpn); 1039219820Sjeff 1040219820Sjeff return 0; 1041219820Sjeff 1042219820Sjefferr_send_cm: 1043219820Sjefferr_modify: 1044219820Sjeff ib_destroy_cm_id(p->id); 1045219820Sjefferr_id: 1046219820Sjeff p->id = NULL; 1047219820Sjeff ib_destroy_qp(p->qp); 1048219820Sjefferr_qp: 1049219820Sjeff p->qp = NULL; 1050219820Sjeff kfree(p->tx_ring); 1051219820Sjefferr_tx: 1052219820Sjeff return ret; 1053219820Sjeff} 1054219820Sjeff 1055219820Sjeffstatic void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) 1056219820Sjeff{ 1057219820Sjeff struct ipoib_dev_priv *priv = p->priv; 1058219820Sjeff struct ifnet *dev = priv->dev; 1059219820Sjeff struct ipoib_cm_tx_buf *tx_req; 1060219820Sjeff unsigned long begin; 1061219820Sjeff 1062219820Sjeff ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", 1063219820Sjeff p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); 1064219820Sjeff 1065219820Sjeff if (p->path) 1066219820Sjeff ipoib_path_free(priv, p->path); 1067219820Sjeff 1068219820Sjeff if (p->id) 1069219820Sjeff ib_destroy_cm_id(p->id); 1070219820Sjeff 1071219820Sjeff if (p->tx_ring) { 1072219820Sjeff /* Wait for all sends to complete */ 1073219820Sjeff begin = jiffies; 1074219820Sjeff while ((int) p->tx_tail - (int) p->tx_head < 0) { 1075219820Sjeff if (time_after(jiffies, begin + 5 * HZ)) { 1076219820Sjeff ipoib_warn(priv, "timing out; %d sends not completed\n", 1077219820Sjeff p->tx_head - p->tx_tail); 1078219820Sjeff goto timeout; 1079219820Sjeff } 1080219820Sjeff 1081219820Sjeff msleep(1); 1082219820Sjeff } 1083219820Sjeff } 1084219820Sjeff 1085219820Sjefftimeout: 1086219820Sjeff 1087219820Sjeff while ((int) p->tx_tail - (int) p->tx_head < 0) { 1088219820Sjeff tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 1089219820Sjeff ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); 1090219820Sjeff m_freem(tx_req->mb); 1091219820Sjeff ++p->tx_tail; 1092219820Sjeff if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 1093219820Sjeff (dev->if_drv_flags & IFF_DRV_OACTIVE) != 0 && 1094219820Sjeff test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 1095219820Sjeff dev->if_drv_flags &= ~IFF_DRV_OACTIVE; 1096219820Sjeff } 1097219820Sjeff 1098219820Sjeff if (p->qp) 1099219820Sjeff ib_destroy_qp(p->qp); 1100219820Sjeff 1101219820Sjeff kfree(p->tx_ring); 1102219820Sjeff kfree(p); 1103219820Sjeff} 1104219820Sjeff 1105219820Sjeffstatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 1106219820Sjeff struct ib_cm_event *event) 1107219820Sjeff{ 1108219820Sjeff struct ipoib_cm_tx *tx = cm_id->context; 1109219820Sjeff struct ipoib_dev_priv *priv = tx->priv; 1110219820Sjeff struct ipoib_path *path; 1111219820Sjeff unsigned long flags; 1112219820Sjeff int ret; 1113219820Sjeff 1114219820Sjeff switch (event->event) { 1115219820Sjeff case IB_CM_DREQ_RECEIVED: 1116219820Sjeff ipoib_dbg(priv, "DREQ received.\n"); 1117219820Sjeff ib_send_cm_drep(cm_id, NULL, 0); 1118219820Sjeff break; 1119219820Sjeff case IB_CM_REP_RECEIVED: 1120219820Sjeff ipoib_dbg(priv, "REP received.\n"); 1121219820Sjeff ret = ipoib_cm_rep_handler(cm_id, event); 1122219820Sjeff if (ret) 1123219820Sjeff ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, 1124219820Sjeff NULL, 0, NULL, 0); 1125219820Sjeff break; 1126219820Sjeff case IB_CM_REQ_ERROR: 1127219820Sjeff case IB_CM_REJ_RECEIVED: 1128219820Sjeff case IB_CM_TIMEWAIT_EXIT: 1129219820Sjeff ipoib_dbg(priv, "CM error %d.\n", event->event); 1130219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1131219820Sjeff path = tx->path; 1132219820Sjeff 1133219820Sjeff if (path) { 1134219820Sjeff path->cm = NULL; 1135219820Sjeff tx->path = NULL; 1136219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 1137219820Sjeff list_del(&path->list); 1138219820Sjeff } 1139219820Sjeff 1140219820Sjeff if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1141219820Sjeff list_move(&tx->list, &priv->cm.reap_list); 1142219820Sjeff queue_work(ipoib_workqueue, &priv->cm.reap_task); 1143219820Sjeff } 1144219820Sjeff 1145219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1146219820Sjeff if (path) 1147219820Sjeff ipoib_path_free(tx->priv, path); 1148219820Sjeff break; 1149219820Sjeff default: 1150219820Sjeff break; 1151219820Sjeff } 1152219820Sjeff 1153219820Sjeff return 0; 1154219820Sjeff} 1155219820Sjeff 1156219820Sjeffstruct ipoib_cm_tx *ipoib_cm_create_tx(struct ipoib_dev_priv *priv, 1157219820Sjeff struct ipoib_path *path) 1158219820Sjeff{ 1159219820Sjeff struct ipoib_cm_tx *tx; 1160219820Sjeff 1161219820Sjeff tx = kzalloc(sizeof *tx, GFP_ATOMIC); 1162219820Sjeff if (!tx) 1163219820Sjeff return NULL; 1164219820Sjeff 1165219820Sjeff ipoib_dbg(priv, "Creating cm tx\n"); 1166219820Sjeff path->cm = tx; 1167219820Sjeff tx->path = path; 1168219820Sjeff tx->priv = priv; 1169219820Sjeff list_add(&tx->list, &priv->cm.start_list); 1170219820Sjeff set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); 1171219820Sjeff queue_work(ipoib_workqueue, &priv->cm.start_task); 1172219820Sjeff return tx; 1173219820Sjeff} 1174219820Sjeff 1175219820Sjeffvoid ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) 1176219820Sjeff{ 1177219820Sjeff struct ipoib_dev_priv *priv = tx->priv; 1178219820Sjeff if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1179219820Sjeff spin_lock(&priv->lock); 1180219820Sjeff list_move(&tx->list, &priv->cm.reap_list); 1181219820Sjeff spin_unlock(&priv->lock); 1182219820Sjeff queue_work(ipoib_workqueue, &priv->cm.reap_task); 1183219820Sjeff ipoib_dbg(priv, "Reap connection for gid %pI6\n", 1184219820Sjeff tx->path->pathrec.dgid.raw); 1185219820Sjeff tx->path = NULL; 1186219820Sjeff } 1187219820Sjeff} 1188219820Sjeff 1189219820Sjeffstatic void ipoib_cm_tx_start(struct work_struct *work) 1190219820Sjeff{ 1191219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1192219820Sjeff cm.start_task); 1193219820Sjeff struct ipoib_path *path; 1194219820Sjeff struct ipoib_cm_tx *p; 1195219820Sjeff unsigned long flags; 1196219820Sjeff int ret; 1197219820Sjeff 1198219820Sjeff struct ib_sa_path_rec pathrec; 1199219820Sjeff u32 qpn; 1200219820Sjeff 1201219820Sjeff ipoib_dbg(priv, "cm start task\n"); 1202219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1203219820Sjeff 1204219820Sjeff while (!list_empty(&priv->cm.start_list)) { 1205219820Sjeff p = list_entry(priv->cm.start_list.next, typeof(*p), list); 1206219820Sjeff list_del_init(&p->list); 1207219820Sjeff path = p->path; 1208219820Sjeff qpn = IPOIB_QPN(path->hwaddr); 1209219820Sjeff memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); 1210219820Sjeff 1211219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1212219820Sjeff 1213219820Sjeff ret = ipoib_cm_tx_init(p, qpn, &pathrec); 1214219820Sjeff 1215219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1216219820Sjeff 1217219820Sjeff if (ret) { 1218219820Sjeff path = p->path; 1219219820Sjeff if (path) { 1220219820Sjeff path->cm = NULL; 1221219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 1222219820Sjeff list_del(&path->list); 1223219820Sjeff ipoib_path_free(priv, path); 1224219820Sjeff } 1225219820Sjeff list_del(&p->list); 1226219820Sjeff kfree(p); 1227219820Sjeff } 1228219820Sjeff } 1229219820Sjeff 1230219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1231219820Sjeff} 1232219820Sjeff 1233219820Sjeffstatic void ipoib_cm_tx_reap(struct work_struct *work) 1234219820Sjeff{ 1235219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1236219820Sjeff cm.reap_task); 1237219820Sjeff struct ipoib_cm_tx *p; 1238219820Sjeff unsigned long flags; 1239219820Sjeff 1240219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1241219820Sjeff 1242219820Sjeff while (!list_empty(&priv->cm.reap_list)) { 1243219820Sjeff p = list_entry(priv->cm.reap_list.next, typeof(*p), list); 1244219820Sjeff list_del(&p->list); 1245219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1246219820Sjeff ipoib_cm_tx_destroy(p); 1247219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1248219820Sjeff } 1249219820Sjeff 1250219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1251219820Sjeff} 1252219820Sjeff 1253219820Sjeffstatic void ipoib_cm_mb_reap(struct work_struct *work) 1254219820Sjeff{ 1255219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1256219820Sjeff cm.mb_task); 1257219820Sjeff struct mbuf *mb; 1258219820Sjeff unsigned long flags; 1259234183Sjhb#if defined(INET) || defined(INET6) 1260219820Sjeff unsigned mtu = priv->mcast_mtu; 1261234183Sjhb#endif 1262219820Sjeff uint16_t proto; 1263219820Sjeff 1264219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1265219820Sjeff 1266219820Sjeff for (;;) { 1267219820Sjeff IF_DEQUEUE(&priv->cm.mb_queue, mb); 1268219820Sjeff if (mb == NULL) 1269219820Sjeff break; 1270219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1271219820Sjeff 1272219820Sjeff proto = htons(*mtod(mb, uint16_t *)); 1273219820Sjeff m_adj(mb, IPOIB_ENCAP_LEN); 1274234183Sjhb switch (proto) { 1275234183Sjhb#if defined(INET) 1276234183Sjhb case ETHERTYPE_IP: 1277219820Sjeff icmp_error(mb, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, mtu); 1278234183Sjhb break; 1279234183Sjhb#endif 1280219820Sjeff#if defined(INET6) 1281234183Sjhb case ETHERTYPE_IPV6: 1282219820Sjeff icmp6_error(mb, ICMP6_PACKET_TOO_BIG, 0, mtu); 1283234183Sjhb break; 1284219820Sjeff#endif 1285234183Sjhb default: 1286219820Sjeff m_freem(mb); 1287234183Sjhb } 1288219820Sjeff 1289219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1290219820Sjeff } 1291219820Sjeff 1292219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1293219820Sjeff} 1294219820Sjeff 1295219820Sjeffvoid 1296219820Sjeffipoib_cm_mb_too_long(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int mtu) 1297219820Sjeff{ 1298219820Sjeff int e = priv->cm.mb_queue.ifq_len; 1299219820Sjeff 1300219820Sjeff IF_ENQUEUE(&priv->cm.mb_queue, mb); 1301219820Sjeff if (e == 0) 1302219820Sjeff queue_work(ipoib_workqueue, &priv->cm.mb_task); 1303219820Sjeff} 1304219820Sjeff 1305219820Sjeffstatic void ipoib_cm_rx_reap(struct work_struct *work) 1306219820Sjeff{ 1307219820Sjeff ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv, 1308219820Sjeff cm.rx_reap_task)); 1309219820Sjeff} 1310219820Sjeff 1311219820Sjeffstatic void ipoib_cm_stale_task(struct work_struct *work) 1312219820Sjeff{ 1313219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1314219820Sjeff cm.stale_task.work); 1315219820Sjeff struct ipoib_cm_rx *p; 1316219820Sjeff int ret; 1317219820Sjeff 1318219820Sjeff spin_lock_irq(&priv->lock); 1319219820Sjeff while (!list_empty(&priv->cm.passive_ids)) { 1320219820Sjeff /* List is sorted by LRU, start from tail, 1321219820Sjeff * stop when we see a recently used entry */ 1322219820Sjeff p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); 1323219820Sjeff if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) 1324219820Sjeff break; 1325219820Sjeff list_move(&p->list, &priv->cm.rx_error_list); 1326219820Sjeff p->state = IPOIB_CM_RX_ERROR; 1327219820Sjeff spin_unlock_irq(&priv->lock); 1328219820Sjeff ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 1329219820Sjeff if (ret) 1330219820Sjeff ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 1331219820Sjeff spin_lock_irq(&priv->lock); 1332219820Sjeff } 1333219820Sjeff 1334219820Sjeff if (!list_empty(&priv->cm.passive_ids)) 1335219820Sjeff queue_delayed_work(ipoib_workqueue, 1336219820Sjeff &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 1337219820Sjeff spin_unlock_irq(&priv->lock); 1338219820Sjeff} 1339219820Sjeff 1340219820Sjeff 1341219820Sjeffstatic void ipoib_cm_create_srq(struct ipoib_dev_priv *priv, int max_sge) 1342219820Sjeff{ 1343219820Sjeff struct ib_srq_init_attr srq_init_attr = { 1344219820Sjeff .attr = { 1345219820Sjeff .max_wr = ipoib_recvq_size, 1346219820Sjeff .max_sge = max_sge 1347219820Sjeff } 1348219820Sjeff }; 1349219820Sjeff 1350219820Sjeff priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); 1351219820Sjeff if (IS_ERR(priv->cm.srq)) { 1352219820Sjeff if (PTR_ERR(priv->cm.srq) != -ENOSYS) 1353219820Sjeff printk(KERN_WARNING "%s: failed to allocate SRQ, error %ld\n", 1354219820Sjeff priv->ca->name, PTR_ERR(priv->cm.srq)); 1355219820Sjeff priv->cm.srq = NULL; 1356219820Sjeff return; 1357219820Sjeff } 1358219820Sjeff 1359219820Sjeff priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, GFP_KERNEL); 1360219820Sjeff if (!priv->cm.srq_ring) { 1361219820Sjeff printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", 1362219820Sjeff priv->ca->name, ipoib_recvq_size); 1363219820Sjeff ib_destroy_srq(priv->cm.srq); 1364219820Sjeff priv->cm.srq = NULL; 1365219820Sjeff return; 1366219820Sjeff } 1367219820Sjeff 1368219820Sjeff memset(priv->cm.srq_ring, 0, ipoib_recvq_size * sizeof *priv->cm.srq_ring); 1369219820Sjeff} 1370219820Sjeff 1371219820Sjeffint ipoib_cm_dev_init(struct ipoib_dev_priv *priv) 1372219820Sjeff{ 1373219820Sjeff struct ifnet *dev = priv->dev; 1374331769Shselasky int i; 1375331769Shselasky int max_srq_sge; 1376219820Sjeff 1377219820Sjeff INIT_LIST_HEAD(&priv->cm.passive_ids); 1378219820Sjeff INIT_LIST_HEAD(&priv->cm.reap_list); 1379219820Sjeff INIT_LIST_HEAD(&priv->cm.start_list); 1380219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_error_list); 1381219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_flush_list); 1382219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_drain_list); 1383219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_reap_list); 1384219820Sjeff INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); 1385219820Sjeff INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); 1386219820Sjeff INIT_WORK(&priv->cm.mb_task, ipoib_cm_mb_reap); 1387219820Sjeff INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap); 1388219820Sjeff INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); 1389219820Sjeff 1390219820Sjeff bzero(&priv->cm.mb_queue, sizeof(priv->cm.mb_queue)); 1391219820Sjeff mtx_init(&priv->cm.mb_queue.ifq_mtx, 1392219820Sjeff dev->if_xname, "if send queue", MTX_DEF); 1393219820Sjeff 1394331769Shselasky max_srq_sge = priv->ca->attrs.max_srq_sge; 1395219820Sjeff 1396331769Shselasky ipoib_dbg(priv, "max_srq_sge=%d\n", max_srq_sge); 1397219820Sjeff 1398331769Shselasky max_srq_sge = min_t(int, IPOIB_CM_RX_SG, max_srq_sge); 1399331769Shselasky ipoib_cm_create_srq(priv, max_srq_sge); 1400219820Sjeff if (ipoib_cm_has_srq(priv)) { 1401331769Shselasky priv->cm.max_cm_mtu = max_srq_sge * MJUMPAGESIZE; 1402331769Shselasky priv->cm.num_frags = max_srq_sge; 1403219820Sjeff ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n", 1404219820Sjeff priv->cm.max_cm_mtu, priv->cm.num_frags); 1405219820Sjeff } else { 1406219820Sjeff priv->cm.max_cm_mtu = IPOIB_CM_MAX_MTU; 1407219820Sjeff priv->cm.num_frags = IPOIB_CM_RX_SG; 1408219820Sjeff } 1409219820Sjeff 1410219820Sjeff ipoib_cm_init_rx_wr(priv, &priv->cm.rx_wr, priv->cm.rx_sge); 1411219820Sjeff 1412219820Sjeff if (ipoib_cm_has_srq(priv)) { 1413219820Sjeff for (i = 0; i < ipoib_recvq_size; ++i) { 1414219820Sjeff if (!ipoib_cm_alloc_rx_mb(priv, &priv->cm.srq_ring[i])) { 1415219820Sjeff ipoib_warn(priv, "failed to allocate " 1416219820Sjeff "receive buffer %d\n", i); 1417219820Sjeff ipoib_cm_dev_cleanup(priv); 1418219820Sjeff return -ENOMEM; 1419219820Sjeff } 1420219820Sjeff 1421219820Sjeff if (ipoib_cm_post_receive_srq(priv, i)) { 1422219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_srq " 1423219820Sjeff "failed for buf %d\n", i); 1424219820Sjeff ipoib_cm_dev_cleanup(priv); 1425219820Sjeff return -EIO; 1426219820Sjeff } 1427219820Sjeff } 1428219820Sjeff } 1429219820Sjeff 1430219820Sjeff IF_LLADDR(priv->dev)[0] = IPOIB_FLAGS_RC; 1431219820Sjeff return 0; 1432219820Sjeff} 1433219820Sjeff 1434219820Sjeffvoid ipoib_cm_dev_cleanup(struct ipoib_dev_priv *priv) 1435219820Sjeff{ 1436219820Sjeff int ret; 1437219820Sjeff 1438219820Sjeff if (!priv->cm.srq) 1439219820Sjeff return; 1440219820Sjeff 1441219820Sjeff ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); 1442219820Sjeff 1443219820Sjeff ret = ib_destroy_srq(priv->cm.srq); 1444219820Sjeff if (ret) 1445219820Sjeff ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret); 1446219820Sjeff 1447219820Sjeff priv->cm.srq = NULL; 1448219820Sjeff if (!priv->cm.srq_ring) 1449219820Sjeff return; 1450219820Sjeff 1451219820Sjeff ipoib_cm_free_rx_ring(priv, priv->cm.srq_ring); 1452219820Sjeff priv->cm.srq_ring = NULL; 1453219820Sjeff 1454219820Sjeff mtx_destroy(&priv->cm.mb_queue.ifq_mtx); 1455219820Sjeff} 1456219820Sjeff 1457219820Sjeff#endif /* CONFIG_INFINIBAND_IPOIB_CM */ 1458