ipoib_cm.c revision 298458
1219820Sjeff/* 2219820Sjeff * Copyright (c) 2006 Mellanox Technologies. All rights reserved 3219820Sjeff * 4219820Sjeff * This software is available to you under a choice of one of two 5219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 6219820Sjeff * General Public License (GPL) Version 2, available from the file 7219820Sjeff * COPYING in the main directory of this source tree, or the 8219820Sjeff * OpenIB.org BSD license below: 9219820Sjeff * 10219820Sjeff * Redistribution and use in source and binary forms, with or 11219820Sjeff * without modification, are permitted provided that the following 12219820Sjeff * conditions are met: 13219820Sjeff * 14219820Sjeff * - Redistributions of source code must retain the above 15219820Sjeff * copyright notice, this list of conditions and the following 16219820Sjeff * disclaimer. 17219820Sjeff * 18219820Sjeff * - Redistributions in binary form must reproduce the above 19219820Sjeff * copyright notice, this list of conditions and the following 20219820Sjeff * disclaimer in the documentation and/or other materials 21219820Sjeff * provided with the distribution. 22219820Sjeff * 23219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30219820Sjeff * SOFTWARE. 31219820Sjeff */ 32219820Sjeff 33219820Sjeff#include "ipoib.h" 34219820Sjeff 35219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_CM 36219820Sjeff 37219820Sjeff#include <netinet/ip.h> 38219820Sjeff#include <netinet/ip_icmp.h> 39219820Sjeff#include <netinet/icmp6.h> 40219820Sjeff 41219820Sjeff#include <rdma/ib_cm.h> 42219820Sjeff#include <rdma/ib_cache.h> 43219820Sjeff#include <linux/delay.h> 44219820Sjeff 45219820Sjeffint ipoib_max_conn_qp = 128; 46219820Sjeff 47219820Sjeffmodule_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444); 48219820SjeffMODULE_PARM_DESC(max_nonsrq_conn_qp, 49219820Sjeff "Max number of connected-mode QPs per interface " 50219820Sjeff "(applied only if shared receive queue is not available)"); 51219820Sjeff 52219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 53219820Sjeffstatic int data_debug_level; 54219820Sjeff 55219820Sjeffmodule_param_named(cm_data_debug_level, data_debug_level, int, 0644); 56219820SjeffMODULE_PARM_DESC(cm_data_debug_level, 57219820Sjeff "Enable data path debug tracing for connected mode if > 0"); 58219820Sjeff#endif 59219820Sjeff 60219820Sjeff#define IPOIB_CM_IETF_ID 0x1000000000000000ULL 61219820Sjeff 62219820Sjeff#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) 63219820Sjeff#define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ) 64219820Sjeff#define IPOIB_CM_RX_DELAY (3 * 256 * HZ) 65219820Sjeff#define IPOIB_CM_RX_UPDATE_MASK (0x3) 66219820Sjeff 67219820Sjeffstatic struct ib_qp_attr ipoib_cm_err_attr = { 68219820Sjeff .qp_state = IB_QPS_ERR 69219820Sjeff}; 70219820Sjeff 71219820Sjeff#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff 72219820Sjeff 73219820Sjeffstatic struct ib_send_wr ipoib_cm_rx_drain_wr = { 74219820Sjeff .wr_id = IPOIB_CM_RX_DRAIN_WRID, 75219820Sjeff .opcode = IB_WR_SEND, 76219820Sjeff}; 77219820Sjeff 78219820Sjeffstatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 79219820Sjeff struct ib_cm_event *event); 80219820Sjeff 81219820Sjeffstatic void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req) 82219820Sjeff{ 83219820Sjeff 84219820Sjeff ipoib_dma_unmap_rx(priv, (struct ipoib_rx_buf *)rx_req); 85219820Sjeff 86219820Sjeff} 87219820Sjeff 88219820Sjeffstatic int ipoib_cm_post_receive_srq(struct ipoib_dev_priv *priv, int id) 89219820Sjeff{ 90219820Sjeff struct ib_recv_wr *bad_wr; 91219820Sjeff struct ipoib_rx_buf *rx_req; 92219820Sjeff struct mbuf *m; 93219820Sjeff int ret; 94219820Sjeff int i; 95219820Sjeff 96219820Sjeff rx_req = (struct ipoib_rx_buf *)&priv->cm.srq_ring[id]; 97219820Sjeff for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) { 98219820Sjeff priv->cm.rx_sge[i].addr = rx_req->mapping[i]; 99219820Sjeff priv->cm.rx_sge[i].length = m->m_len; 100219820Sjeff } 101219820Sjeff 102219820Sjeff priv->cm.rx_wr.num_sge = i; 103219820Sjeff priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 104219820Sjeff 105219820Sjeff ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); 106219820Sjeff if (unlikely(ret)) { 107219820Sjeff ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); 108219820Sjeff ipoib_dma_unmap_rx(priv, rx_req); 109219820Sjeff m_freem(priv->cm.srq_ring[id].mb); 110219820Sjeff priv->cm.srq_ring[id].mb = NULL; 111219820Sjeff } 112219820Sjeff 113219820Sjeff return ret; 114219820Sjeff} 115219820Sjeff 116219820Sjeffstatic int ipoib_cm_post_receive_nonsrq(struct ipoib_dev_priv *priv, 117219820Sjeff struct ipoib_cm_rx *rx, 118219820Sjeff struct ib_recv_wr *wr, 119219820Sjeff struct ib_sge *sge, int id) 120219820Sjeff{ 121219820Sjeff struct ipoib_rx_buf *rx_req; 122219820Sjeff struct ib_recv_wr *bad_wr; 123219820Sjeff struct mbuf *m; 124219820Sjeff int ret; 125219820Sjeff int i; 126219820Sjeff 127219820Sjeff rx_req = (struct ipoib_rx_buf *)&rx->rx_ring[id]; 128219820Sjeff for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) { 129219820Sjeff sge[i].addr = rx_req->mapping[i]; 130219820Sjeff sge[i].length = m->m_len; 131219820Sjeff } 132219820Sjeff 133219820Sjeff wr->num_sge = i; 134219820Sjeff wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 135219820Sjeff 136219820Sjeff ret = ib_post_recv(rx->qp, wr, &bad_wr); 137219820Sjeff if (unlikely(ret)) { 138219820Sjeff ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); 139219820Sjeff ipoib_dma_unmap_rx(priv, rx_req); 140219820Sjeff m_freem(rx->rx_ring[id].mb); 141219820Sjeff rx->rx_ring[id].mb = NULL; 142219820Sjeff } 143219820Sjeff 144219820Sjeff return ret; 145219820Sjeff} 146219820Sjeff 147219820Sjeffstatic struct mbuf * 148219820Sjeffipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req) 149219820Sjeff{ 150219820Sjeff return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req, 151219820Sjeff priv->cm.max_cm_mtu); 152219820Sjeff} 153219820Sjeff 154219820Sjeffstatic void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv, 155219820Sjeff struct ipoib_cm_rx_buf *rx_ring) 156219820Sjeff{ 157219820Sjeff int i; 158219820Sjeff 159219820Sjeff for (i = 0; i < ipoib_recvq_size; ++i) 160219820Sjeff if (rx_ring[i].mb) { 161219820Sjeff ipoib_cm_dma_unmap_rx(priv, &rx_ring[i]); 162219820Sjeff m_freem(rx_ring[i].mb); 163219820Sjeff } 164219820Sjeff 165219820Sjeff kfree(rx_ring); 166219820Sjeff} 167219820Sjeff 168219820Sjeffstatic void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) 169219820Sjeff{ 170219820Sjeff struct ib_send_wr *bad_wr; 171219820Sjeff struct ipoib_cm_rx *p; 172219820Sjeff 173219820Sjeff /* We only reserved 1 extra slot in CQ for drain WRs, so 174219820Sjeff * make sure we have at most 1 outstanding WR. */ 175219820Sjeff if (list_empty(&priv->cm.rx_flush_list) || 176219820Sjeff !list_empty(&priv->cm.rx_drain_list)) 177219820Sjeff return; 178219820Sjeff 179219820Sjeff /* 180219820Sjeff * QPs on flush list are error state. This way, a "flush 181219820Sjeff * error" WC will be immediately generated for each WR we post. 182219820Sjeff */ 183219820Sjeff p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); 184219820Sjeff if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) 185219820Sjeff ipoib_warn(priv, "failed to post drain wr\n"); 186219820Sjeff 187219820Sjeff list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); 188219820Sjeff} 189219820Sjeff 190219820Sjeffstatic void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) 191219820Sjeff{ 192219820Sjeff struct ipoib_cm_rx *p = ctx; 193219820Sjeff struct ipoib_dev_priv *priv = p->priv; 194219820Sjeff unsigned long flags; 195219820Sjeff 196219820Sjeff if (event->event != IB_EVENT_QP_LAST_WQE_REACHED) 197219820Sjeff return; 198219820Sjeff 199219820Sjeff spin_lock_irqsave(&priv->lock, flags); 200219820Sjeff list_move(&p->list, &priv->cm.rx_flush_list); 201219820Sjeff p->state = IPOIB_CM_RX_FLUSH; 202219820Sjeff ipoib_cm_start_rx_drain(priv); 203219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 204219820Sjeff} 205219820Sjeff 206219820Sjeffstatic struct ib_qp *ipoib_cm_create_rx_qp(struct ipoib_dev_priv *priv, 207219820Sjeff struct ipoib_cm_rx *p) 208219820Sjeff{ 209219820Sjeff struct ib_qp_init_attr attr = { 210219820Sjeff .event_handler = ipoib_cm_rx_event_handler, 211219820Sjeff .send_cq = priv->recv_cq, /* For drain WR */ 212219820Sjeff .recv_cq = priv->recv_cq, 213219820Sjeff .srq = priv->cm.srq, 214219820Sjeff .cap.max_send_wr = 1, /* For drain WR */ 215219820Sjeff .cap.max_send_sge = 1, 216219820Sjeff .sq_sig_type = IB_SIGNAL_ALL_WR, 217219820Sjeff .qp_type = IB_QPT_RC, 218219820Sjeff .qp_context = p, 219219820Sjeff }; 220219820Sjeff 221219820Sjeff if (!ipoib_cm_has_srq(priv)) { 222219820Sjeff attr.cap.max_recv_wr = ipoib_recvq_size; 223219820Sjeff attr.cap.max_recv_sge = priv->cm.num_frags; 224219820Sjeff } 225219820Sjeff 226219820Sjeff return ib_create_qp(priv->pd, &attr); 227219820Sjeff} 228219820Sjeff 229219820Sjeffstatic int ipoib_cm_modify_rx_qp(struct ipoib_dev_priv *priv, 230219820Sjeff struct ib_cm_id *cm_id, struct ib_qp *qp, 231219820Sjeff unsigned psn) 232219820Sjeff{ 233219820Sjeff struct ib_qp_attr qp_attr; 234219820Sjeff int qp_attr_mask, ret; 235219820Sjeff 236219820Sjeff qp_attr.qp_state = IB_QPS_INIT; 237219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 238219820Sjeff if (ret) { 239219820Sjeff ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); 240219820Sjeff return ret; 241219820Sjeff } 242219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 243219820Sjeff if (ret) { 244219820Sjeff ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); 245219820Sjeff return ret; 246219820Sjeff } 247219820Sjeff qp_attr.qp_state = IB_QPS_RTR; 248219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 249219820Sjeff if (ret) { 250219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); 251219820Sjeff return ret; 252219820Sjeff } 253219820Sjeff qp_attr.rq_psn = psn; 254219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 255219820Sjeff if (ret) { 256219820Sjeff ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); 257219820Sjeff return ret; 258219820Sjeff } 259219820Sjeff 260219820Sjeff /* 261219820Sjeff * Current Mellanox HCA firmware won't generate completions 262219820Sjeff * with error for drain WRs unless the QP has been moved to 263219820Sjeff * RTS first. This work-around leaves a window where a QP has 264219820Sjeff * moved to error asynchronously, but this will eventually get 265219820Sjeff * fixed in firmware, so let's not error out if modify QP 266219820Sjeff * fails. 267219820Sjeff */ 268219820Sjeff qp_attr.qp_state = IB_QPS_RTS; 269219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 270219820Sjeff if (ret) { 271219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); 272219820Sjeff return 0; 273219820Sjeff } 274219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 275219820Sjeff if (ret) { 276219820Sjeff ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); 277219820Sjeff return 0; 278219820Sjeff } 279219820Sjeff 280219820Sjeff return 0; 281219820Sjeff} 282219820Sjeff 283219820Sjeffstatic void ipoib_cm_init_rx_wr(struct ipoib_dev_priv *priv, 284219820Sjeff struct ib_recv_wr *wr, 285219820Sjeff struct ib_sge *sge) 286219820Sjeff{ 287219820Sjeff int i; 288219820Sjeff 289219820Sjeff for (i = 0; i < IPOIB_CM_RX_SG; i++) 290219820Sjeff sge[i].lkey = priv->mr->lkey; 291219820Sjeff 292219820Sjeff wr->next = NULL; 293219820Sjeff wr->sg_list = sge; 294219820Sjeff wr->num_sge = 1; 295219820Sjeff} 296219820Sjeff 297219820Sjeffstatic int ipoib_cm_nonsrq_init_rx(struct ipoib_dev_priv *priv, 298219820Sjeff struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) 299219820Sjeff{ 300219820Sjeff struct { 301219820Sjeff struct ib_recv_wr wr; 302219820Sjeff struct ib_sge sge[IPOIB_CM_RX_SG]; 303219820Sjeff } *t; 304219820Sjeff int ret; 305219820Sjeff int i; 306219820Sjeff 307219820Sjeff rx->rx_ring = kzalloc(ipoib_recvq_size * sizeof *rx->rx_ring, GFP_KERNEL); 308219820Sjeff if (!rx->rx_ring) { 309219820Sjeff printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n", 310219820Sjeff priv->ca->name, ipoib_recvq_size); 311219820Sjeff return -ENOMEM; 312219820Sjeff } 313219820Sjeff 314219820Sjeff memset(rx->rx_ring, 0, ipoib_recvq_size * sizeof *rx->rx_ring); 315219820Sjeff 316219820Sjeff t = kmalloc(sizeof *t, GFP_KERNEL); 317219820Sjeff if (!t) { 318219820Sjeff ret = -ENOMEM; 319219820Sjeff goto err_free; 320219820Sjeff } 321219820Sjeff 322219820Sjeff ipoib_cm_init_rx_wr(priv, &t->wr, t->sge); 323219820Sjeff 324219820Sjeff spin_lock_irq(&priv->lock); 325219820Sjeff 326219820Sjeff if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { 327219820Sjeff spin_unlock_irq(&priv->lock); 328219820Sjeff ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0); 329219820Sjeff ret = -EINVAL; 330219820Sjeff goto err_free; 331219820Sjeff } else 332219820Sjeff ++priv->cm.nonsrq_conn_qp; 333219820Sjeff 334219820Sjeff spin_unlock_irq(&priv->lock); 335219820Sjeff 336219820Sjeff for (i = 0; i < ipoib_recvq_size; ++i) { 337219820Sjeff if (!ipoib_cm_alloc_rx_mb(priv, &rx->rx_ring[i])) { 338219820Sjeff ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 339219820Sjeff ret = -ENOMEM; 340219820Sjeff goto err_count; 341219820Sjeff } 342219820Sjeff ret = ipoib_cm_post_receive_nonsrq(priv, rx, &t->wr, t->sge, i); 343219820Sjeff if (ret) { 344219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " 345219820Sjeff "failed for buf %d\n", i); 346219820Sjeff ret = -EIO; 347219820Sjeff goto err_count; 348219820Sjeff } 349219820Sjeff } 350219820Sjeff 351219820Sjeff rx->recv_count = ipoib_recvq_size; 352219820Sjeff 353219820Sjeff kfree(t); 354219820Sjeff 355219820Sjeff return 0; 356219820Sjeff 357219820Sjefferr_count: 358219820Sjeff spin_lock_irq(&priv->lock); 359219820Sjeff --priv->cm.nonsrq_conn_qp; 360219820Sjeff spin_unlock_irq(&priv->lock); 361219820Sjeff 362219820Sjefferr_free: 363219820Sjeff kfree(t); 364219820Sjeff ipoib_cm_free_rx_ring(priv, rx->rx_ring); 365219820Sjeff 366219820Sjeff return ret; 367219820Sjeff} 368219820Sjeff 369219820Sjeffstatic int ipoib_cm_send_rep(struct ipoib_dev_priv *priv, struct ib_cm_id *cm_id, 370219820Sjeff struct ib_qp *qp, struct ib_cm_req_event_param *req, 371219820Sjeff unsigned psn) 372219820Sjeff{ 373219820Sjeff struct ipoib_cm_data data = {}; 374219820Sjeff struct ib_cm_rep_param rep = {}; 375219820Sjeff 376219820Sjeff data.qpn = cpu_to_be32(priv->qp->qp_num); 377219820Sjeff data.mtu = cpu_to_be32(priv->cm.max_cm_mtu); 378219820Sjeff 379219820Sjeff rep.private_data = &data; 380219820Sjeff rep.private_data_len = sizeof data; 381219820Sjeff rep.flow_control = 0; 382219820Sjeff rep.rnr_retry_count = req->rnr_retry_count; 383219820Sjeff rep.srq = ipoib_cm_has_srq(priv); 384219820Sjeff rep.qp_num = qp->qp_num; 385219820Sjeff rep.starting_psn = psn; 386219820Sjeff return ib_send_cm_rep(cm_id, &rep); 387219820Sjeff} 388219820Sjeff 389219820Sjeffstatic int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 390219820Sjeff{ 391219820Sjeff struct ipoib_dev_priv *priv = cm_id->context; 392219820Sjeff struct ipoib_cm_rx *p; 393219820Sjeff unsigned psn; 394219820Sjeff int ret; 395219820Sjeff 396219820Sjeff ipoib_dbg(priv, "REQ arrived\n"); 397219820Sjeff p = kzalloc(sizeof *p, GFP_KERNEL); 398219820Sjeff if (!p) 399219820Sjeff return -ENOMEM; 400219820Sjeff p->priv = priv; 401219820Sjeff p->id = cm_id; 402219820Sjeff cm_id->context = p; 403219820Sjeff p->state = IPOIB_CM_RX_LIVE; 404219820Sjeff p->jiffies = jiffies; 405219820Sjeff INIT_LIST_HEAD(&p->list); 406219820Sjeff 407219820Sjeff p->qp = ipoib_cm_create_rx_qp(priv, p); 408219820Sjeff if (IS_ERR(p->qp)) { 409219820Sjeff ret = PTR_ERR(p->qp); 410219820Sjeff goto err_qp; 411219820Sjeff } 412219820Sjeff 413219820Sjeff psn = random() & 0xffffff; 414219820Sjeff ret = ipoib_cm_modify_rx_qp(priv, cm_id, p->qp, psn); 415219820Sjeff if (ret) 416219820Sjeff goto err_modify; 417219820Sjeff 418219820Sjeff if (!ipoib_cm_has_srq(priv)) { 419219820Sjeff ret = ipoib_cm_nonsrq_init_rx(priv, cm_id, p); 420219820Sjeff if (ret) 421219820Sjeff goto err_modify; 422219820Sjeff } 423219820Sjeff 424219820Sjeff spin_lock_irq(&priv->lock); 425219820Sjeff queue_delayed_work(ipoib_workqueue, 426219820Sjeff &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 427219820Sjeff /* Add this entry to passive ids list head, but do not re-add it 428219820Sjeff * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ 429219820Sjeff p->jiffies = jiffies; 430219820Sjeff if (p->state == IPOIB_CM_RX_LIVE) 431219820Sjeff list_move(&p->list, &priv->cm.passive_ids); 432219820Sjeff spin_unlock_irq(&priv->lock); 433219820Sjeff 434219820Sjeff ret = ipoib_cm_send_rep(priv, cm_id, p->qp, &event->param.req_rcvd, psn); 435219820Sjeff if (ret) { 436219820Sjeff ipoib_warn(priv, "failed to send REP: %d\n", ret); 437219820Sjeff if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) 438219820Sjeff ipoib_warn(priv, "unable to move qp to error state\n"); 439219820Sjeff } 440219820Sjeff return 0; 441219820Sjeff 442219820Sjefferr_modify: 443219820Sjeff ib_destroy_qp(p->qp); 444219820Sjefferr_qp: 445219820Sjeff kfree(p); 446219820Sjeff return ret; 447219820Sjeff} 448219820Sjeff 449219820Sjeffstatic int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, 450219820Sjeff struct ib_cm_event *event) 451219820Sjeff{ 452219820Sjeff struct ipoib_cm_rx *p; 453219820Sjeff struct ipoib_dev_priv *priv; 454219820Sjeff 455219820Sjeff switch (event->event) { 456219820Sjeff case IB_CM_REQ_RECEIVED: 457219820Sjeff return ipoib_cm_req_handler(cm_id, event); 458219820Sjeff case IB_CM_DREQ_RECEIVED: 459219820Sjeff p = cm_id->context; 460219820Sjeff ib_send_cm_drep(cm_id, NULL, 0); 461219820Sjeff /* Fall through */ 462219820Sjeff case IB_CM_REJ_RECEIVED: 463219820Sjeff p = cm_id->context; 464219820Sjeff priv = p->priv; 465219820Sjeff if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) 466219820Sjeff ipoib_warn(priv, "unable to move qp to error state\n"); 467219820Sjeff /* Fall through */ 468219820Sjeff default: 469219820Sjeff return 0; 470219820Sjeff } 471219820Sjeff} 472219820Sjeff 473219820Sjeffvoid ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) 474219820Sjeff{ 475219820Sjeff struct ipoib_cm_rx_buf saverx; 476219820Sjeff struct ipoib_cm_rx_buf *rx_ring; 477219820Sjeff unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); 478219820Sjeff struct ifnet *dev = priv->dev; 479219820Sjeff struct mbuf *mb, *newmb; 480219820Sjeff struct ipoib_cm_rx *p; 481219820Sjeff int has_srq; 482219820Sjeff u_short proto; 483219820Sjeff 484298458Shselasky CURVNET_SET_QUIET(dev->if_vnet); 485298458Shselasky 486219820Sjeff ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", 487219820Sjeff wr_id, wc->status); 488219820Sjeff 489219820Sjeff if (unlikely(wr_id >= ipoib_recvq_size)) { 490219820Sjeff if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { 491219820Sjeff spin_lock(&priv->lock); 492219820Sjeff list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 493219820Sjeff ipoib_cm_start_rx_drain(priv); 494219820Sjeff if (priv->cm.id != NULL) 495219820Sjeff queue_work(ipoib_workqueue, 496219820Sjeff &priv->cm.rx_reap_task); 497219820Sjeff spin_unlock(&priv->lock); 498219820Sjeff } else 499219820Sjeff ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 500219820Sjeff wr_id, ipoib_recvq_size); 501298458Shselasky goto done; 502219820Sjeff } 503219820Sjeff 504219820Sjeff p = wc->qp->qp_context; 505219820Sjeff 506219820Sjeff has_srq = ipoib_cm_has_srq(priv); 507219820Sjeff rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring; 508219820Sjeff 509219820Sjeff mb = rx_ring[wr_id].mb; 510219820Sjeff 511219820Sjeff if (unlikely(wc->status != IB_WC_SUCCESS)) { 512219820Sjeff ipoib_dbg(priv, "cm recv error " 513219820Sjeff "(status=%d, wrid=%d vend_err %x)\n", 514219820Sjeff wc->status, wr_id, wc->vendor_err); 515272225Sglebius if_inc_counter(dev, IFCOUNTER_IERRORS, 1); 516219820Sjeff if (has_srq) 517219820Sjeff goto repost; 518219820Sjeff else { 519219820Sjeff if (!--p->recv_count) { 520219820Sjeff spin_lock(&priv->lock); 521219820Sjeff list_move(&p->list, &priv->cm.rx_reap_list); 522219820Sjeff queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); 523219820Sjeff spin_unlock(&priv->lock); 524219820Sjeff } 525298458Shselasky goto done; 526219820Sjeff } 527219820Sjeff } 528219820Sjeff 529219820Sjeff if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { 530219820Sjeff if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 531219820Sjeff p->jiffies = jiffies; 532219820Sjeff /* Move this entry to list head, but do not re-add it 533219820Sjeff * if it has been moved out of list. */ 534219820Sjeff if (p->state == IPOIB_CM_RX_LIVE) 535219820Sjeff list_move(&p->list, &priv->cm.passive_ids); 536219820Sjeff } 537219820Sjeff } 538219820Sjeff 539219820Sjeff memcpy(&saverx, &rx_ring[wr_id], sizeof(saverx)); 540219820Sjeff newmb = ipoib_cm_alloc_rx_mb(priv, &rx_ring[wr_id]); 541219820Sjeff if (unlikely(!newmb)) { 542219820Sjeff /* 543219820Sjeff * If we can't allocate a new RX buffer, dump 544219820Sjeff * this packet and reuse the old buffer. 545219820Sjeff */ 546219820Sjeff ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); 547272225Sglebius if_inc_counter(dev, IFCOUNTER_IERRORS, 1); 548219820Sjeff memcpy(&rx_ring[wr_id], &saverx, sizeof(saverx)); 549219820Sjeff goto repost; 550219820Sjeff } 551219820Sjeff 552219820Sjeff ipoib_cm_dma_unmap_rx(priv, &saverx); 553219820Sjeff 554219820Sjeff ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 555219820Sjeff wc->byte_len, wc->slid); 556219820Sjeff 557219820Sjeff ipoib_dma_mb(priv, mb, wc->byte_len); 558219820Sjeff 559272225Sglebius if_inc_counter(dev, IFCOUNTER_IPACKETS, 1); 560272225Sglebius if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len); 561219820Sjeff 562219820Sjeff mb->m_pkthdr.rcvif = dev; 563219820Sjeff proto = *mtod(mb, uint16_t *); 564219820Sjeff m_adj(mb, IPOIB_ENCAP_LEN); 565219820Sjeff 566219820Sjeff IPOIB_MTAP_PROTO(dev, mb, proto); 567219820Sjeff ipoib_demux(dev, mb, ntohs(proto)); 568219820Sjeff 569219820Sjeffrepost: 570219820Sjeff if (has_srq) { 571219820Sjeff if (unlikely(ipoib_cm_post_receive_srq(priv, wr_id))) 572219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " 573219820Sjeff "for buf %d\n", wr_id); 574219820Sjeff } else { 575219820Sjeff if (unlikely(ipoib_cm_post_receive_nonsrq(priv, p, 576219820Sjeff &priv->cm.rx_wr, 577219820Sjeff priv->cm.rx_sge, 578219820Sjeff wr_id))) { 579219820Sjeff --p->recv_count; 580219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " 581219820Sjeff "for buf %d\n", wr_id); 582219820Sjeff } 583219820Sjeff } 584298458Shselaskydone: 585298458Shselasky CURVNET_RESTORE(); 586298458Shselasky return; 587219820Sjeff} 588219820Sjeff 589219820Sjeffstatic inline int post_send(struct ipoib_dev_priv *priv, 590219820Sjeff struct ipoib_cm_tx *tx, 591219820Sjeff struct ipoib_cm_tx_buf *tx_req, 592219820Sjeff unsigned int wr_id) 593219820Sjeff{ 594219820Sjeff struct ib_send_wr *bad_wr; 595219820Sjeff struct mbuf *mb = tx_req->mb; 596219820Sjeff u64 *mapping = tx_req->mapping; 597219820Sjeff struct mbuf *m; 598219820Sjeff int i; 599219820Sjeff 600219820Sjeff for (m = mb, i = 0; m != NULL; m = m->m_next, i++) { 601219820Sjeff priv->tx_sge[i].addr = mapping[i]; 602219820Sjeff priv->tx_sge[i].length = m->m_len; 603219820Sjeff } 604219820Sjeff priv->tx_wr.num_sge = i; 605219820Sjeff priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; 606219820Sjeff priv->tx_wr.opcode = IB_WR_SEND; 607219820Sjeff 608219820Sjeff return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); 609219820Sjeff} 610219820Sjeff 611219820Sjeffvoid ipoib_cm_send(struct ipoib_dev_priv *priv, struct mbuf *mb, struct ipoib_cm_tx *tx) 612219820Sjeff{ 613219820Sjeff struct ipoib_cm_tx_buf *tx_req; 614219820Sjeff struct ifnet *dev = priv->dev; 615219820Sjeff 616219820Sjeff if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) 617219820Sjeff while (ipoib_poll_tx(priv)); /* nothing */ 618219820Sjeff 619219820Sjeff m_adj(mb, sizeof(struct ipoib_pseudoheader)); 620219820Sjeff if (unlikely(mb->m_pkthdr.len > tx->mtu)) { 621219820Sjeff ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 622219820Sjeff mb->m_pkthdr.len, tx->mtu); 623272225Sglebius if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 624219820Sjeff ipoib_cm_mb_too_long(priv, mb, IPOIB_CM_MTU(tx->mtu)); 625219820Sjeff return; 626219820Sjeff } 627219820Sjeff 628219820Sjeff ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", 629219820Sjeff tx->tx_head, mb->m_pkthdr.len, tx->qp->qp_num); 630219820Sjeff 631219820Sjeff 632219820Sjeff /* 633219820Sjeff * We put the mb into the tx_ring _before_ we call post_send() 634219820Sjeff * because it's entirely possible that the completion handler will 635219820Sjeff * run before we execute anything after the post_send(). That 636219820Sjeff * means we have to make sure everything is properly recorded and 637219820Sjeff * our state is consistent before we call post_send(). 638219820Sjeff */ 639219820Sjeff tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; 640219820Sjeff tx_req->mb = mb; 641219820Sjeff if (unlikely(ipoib_dma_map_tx(priv->ca, (struct ipoib_tx_buf *)tx_req, 642219820Sjeff priv->cm.num_frags))) { 643272225Sglebius if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 644219820Sjeff if (tx_req->mb) 645219820Sjeff m_freem(tx_req->mb); 646219820Sjeff return; 647219820Sjeff } 648219820Sjeff 649219820Sjeff if (unlikely(post_send(priv, tx, tx_req, tx->tx_head & (ipoib_sendq_size - 1)))) { 650219820Sjeff ipoib_warn(priv, "post_send failed\n"); 651272225Sglebius if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 652219820Sjeff ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); 653219820Sjeff m_freem(mb); 654219820Sjeff } else { 655219820Sjeff ++tx->tx_head; 656219820Sjeff 657219820Sjeff if (++priv->tx_outstanding == ipoib_sendq_size) { 658219820Sjeff ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", 659219820Sjeff tx->qp->qp_num); 660219820Sjeff if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) 661219820Sjeff ipoib_warn(priv, "request notify on send CQ failed\n"); 662219820Sjeff dev->if_drv_flags |= IFF_DRV_OACTIVE; 663219820Sjeff } 664219820Sjeff } 665219820Sjeff 666219820Sjeff} 667219820Sjeff 668219820Sjeffvoid ipoib_cm_handle_tx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) 669219820Sjeff{ 670219820Sjeff struct ipoib_cm_tx *tx = wc->qp->qp_context; 671219820Sjeff unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; 672219820Sjeff struct ifnet *dev = priv->dev; 673219820Sjeff struct ipoib_cm_tx_buf *tx_req; 674219820Sjeff 675219820Sjeff ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", 676219820Sjeff wr_id, wc->status); 677219820Sjeff 678219820Sjeff if (unlikely(wr_id >= ipoib_sendq_size)) { 679219820Sjeff ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n", 680219820Sjeff wr_id, ipoib_sendq_size); 681219820Sjeff return; 682219820Sjeff } 683219820Sjeff 684219820Sjeff tx_req = &tx->tx_ring[wr_id]; 685219820Sjeff 686219820Sjeff ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); 687219820Sjeff 688219820Sjeff /* FIXME: is this right? Shouldn't we only increment on success? */ 689272225Sglebius if_inc_counter(dev, IFCOUNTER_OPACKETS, 1); 690219820Sjeff 691219820Sjeff m_freem(tx_req->mb); 692219820Sjeff 693219820Sjeff ++tx->tx_tail; 694219820Sjeff if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 695219820Sjeff (dev->if_drv_flags & IFF_DRV_OACTIVE) != 0 && 696219820Sjeff test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 697219820Sjeff dev->if_drv_flags &= ~IFF_DRV_OACTIVE; 698219820Sjeff 699219820Sjeff if (wc->status != IB_WC_SUCCESS && 700219820Sjeff wc->status != IB_WC_WR_FLUSH_ERR) { 701219820Sjeff struct ipoib_path *path; 702219820Sjeff 703219820Sjeff ipoib_dbg(priv, "failed cm send event " 704219820Sjeff "(status=%d, wrid=%d vend_err %x)\n", 705219820Sjeff wc->status, wr_id, wc->vendor_err); 706219820Sjeff 707219820Sjeff path = tx->path; 708219820Sjeff 709219820Sjeff if (path) { 710219820Sjeff path->cm = NULL; 711219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 712219820Sjeff list_del(&path->list); 713219820Sjeff } 714219820Sjeff 715219820Sjeff if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 716219820Sjeff list_move(&tx->list, &priv->cm.reap_list); 717219820Sjeff queue_work(ipoib_workqueue, &priv->cm.reap_task); 718219820Sjeff } 719219820Sjeff 720219820Sjeff clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); 721219820Sjeff } 722219820Sjeff 723219820Sjeff} 724219820Sjeff 725219820Sjeffint ipoib_cm_dev_open(struct ipoib_dev_priv *priv) 726219820Sjeff{ 727219820Sjeff int ret; 728219820Sjeff 729219820Sjeff if (!IPOIB_CM_SUPPORTED(IF_LLADDR(priv->dev))) 730219820Sjeff return 0; 731219820Sjeff 732219820Sjeff priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, priv); 733219820Sjeff if (IS_ERR(priv->cm.id)) { 734219820Sjeff printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); 735219820Sjeff ret = PTR_ERR(priv->cm.id); 736219820Sjeff goto err_cm; 737219820Sjeff } 738219820Sjeff 739219820Sjeff ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), 740219820Sjeff 0, NULL); 741219820Sjeff if (ret) { 742219820Sjeff printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, 743219820Sjeff IPOIB_CM_IETF_ID | priv->qp->qp_num); 744219820Sjeff goto err_listen; 745219820Sjeff } 746219820Sjeff 747219820Sjeff return 0; 748219820Sjeff 749219820Sjefferr_listen: 750219820Sjeff ib_destroy_cm_id(priv->cm.id); 751219820Sjefferr_cm: 752219820Sjeff priv->cm.id = NULL; 753219820Sjeff return ret; 754219820Sjeff} 755219820Sjeff 756219820Sjeffstatic void ipoib_cm_free_rx_reap_list(struct ipoib_dev_priv *priv) 757219820Sjeff{ 758219820Sjeff struct ipoib_cm_rx *rx, *n; 759219820Sjeff LIST_HEAD(list); 760219820Sjeff 761219820Sjeff spin_lock_irq(&priv->lock); 762219820Sjeff list_splice_init(&priv->cm.rx_reap_list, &list); 763219820Sjeff spin_unlock_irq(&priv->lock); 764219820Sjeff 765219820Sjeff list_for_each_entry_safe(rx, n, &list, list) { 766219820Sjeff ib_destroy_cm_id(rx->id); 767219820Sjeff ib_destroy_qp(rx->qp); 768219820Sjeff if (!ipoib_cm_has_srq(priv)) { 769219820Sjeff ipoib_cm_free_rx_ring(priv, rx->rx_ring); 770219820Sjeff spin_lock_irq(&priv->lock); 771219820Sjeff --priv->cm.nonsrq_conn_qp; 772219820Sjeff spin_unlock_irq(&priv->lock); 773219820Sjeff } 774219820Sjeff kfree(rx); 775219820Sjeff } 776219820Sjeff} 777219820Sjeff 778219820Sjeffvoid ipoib_cm_dev_stop(struct ipoib_dev_priv *priv) 779219820Sjeff{ 780219820Sjeff struct ipoib_cm_rx *p; 781219820Sjeff unsigned long begin; 782219820Sjeff int ret; 783219820Sjeff 784219820Sjeff if (!IPOIB_CM_SUPPORTED(IF_LLADDR(priv->dev)) || !priv->cm.id) 785219820Sjeff return; 786219820Sjeff 787219820Sjeff ib_destroy_cm_id(priv->cm.id); 788219820Sjeff priv->cm.id = NULL; 789219820Sjeff 790219820Sjeff cancel_work_sync(&priv->cm.rx_reap_task); 791219820Sjeff 792219820Sjeff spin_lock_irq(&priv->lock); 793219820Sjeff while (!list_empty(&priv->cm.passive_ids)) { 794219820Sjeff p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); 795219820Sjeff list_move(&p->list, &priv->cm.rx_error_list); 796219820Sjeff p->state = IPOIB_CM_RX_ERROR; 797219820Sjeff spin_unlock_irq(&priv->lock); 798219820Sjeff ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 799219820Sjeff if (ret) 800219820Sjeff ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 801219820Sjeff spin_lock_irq(&priv->lock); 802219820Sjeff } 803219820Sjeff 804219820Sjeff /* Wait for all RX to be drained */ 805219820Sjeff begin = jiffies; 806219820Sjeff 807219820Sjeff while (!list_empty(&priv->cm.rx_error_list) || 808219820Sjeff !list_empty(&priv->cm.rx_flush_list) || 809219820Sjeff !list_empty(&priv->cm.rx_drain_list)) { 810219820Sjeff if (time_after(jiffies, begin + 5 * HZ)) { 811219820Sjeff ipoib_warn(priv, "RX drain timing out\n"); 812219820Sjeff 813219820Sjeff /* 814219820Sjeff * assume the HW is wedged and just free up everything. 815219820Sjeff */ 816219820Sjeff list_splice_init(&priv->cm.rx_flush_list, 817219820Sjeff &priv->cm.rx_reap_list); 818219820Sjeff list_splice_init(&priv->cm.rx_error_list, 819219820Sjeff &priv->cm.rx_reap_list); 820219820Sjeff list_splice_init(&priv->cm.rx_drain_list, 821219820Sjeff &priv->cm.rx_reap_list); 822219820Sjeff break; 823219820Sjeff } 824219820Sjeff spin_unlock_irq(&priv->lock); 825219820Sjeff msleep(1); 826219820Sjeff ipoib_drain_cq(priv); 827219820Sjeff spin_lock_irq(&priv->lock); 828219820Sjeff } 829219820Sjeff 830219820Sjeff spin_unlock_irq(&priv->lock); 831219820Sjeff 832219820Sjeff ipoib_cm_free_rx_reap_list(priv); 833219820Sjeff 834219820Sjeff cancel_delayed_work(&priv->cm.stale_task); 835219820Sjeff} 836219820Sjeff 837219820Sjeffstatic int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 838219820Sjeff{ 839219820Sjeff struct ipoib_cm_tx *p = cm_id->context; 840219820Sjeff struct ipoib_dev_priv *priv = p->priv; 841219820Sjeff struct ipoib_cm_data *data = event->private_data; 842219820Sjeff struct ifqueue mbqueue; 843219820Sjeff struct ib_qp_attr qp_attr; 844219820Sjeff int qp_attr_mask, ret; 845219820Sjeff struct mbuf *mb; 846219820Sjeff 847219820Sjeff ipoib_dbg(priv, "cm rep handler\n"); 848219820Sjeff p->mtu = be32_to_cpu(data->mtu); 849219820Sjeff 850219820Sjeff if (p->mtu <= IPOIB_ENCAP_LEN) { 851219820Sjeff ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n", 852219820Sjeff p->mtu, IPOIB_ENCAP_LEN); 853219820Sjeff return -EINVAL; 854219820Sjeff } 855219820Sjeff 856219820Sjeff qp_attr.qp_state = IB_QPS_RTR; 857219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 858219820Sjeff if (ret) { 859219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); 860219820Sjeff return ret; 861219820Sjeff } 862219820Sjeff 863219820Sjeff qp_attr.rq_psn = 0 /* FIXME */; 864219820Sjeff ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); 865219820Sjeff if (ret) { 866219820Sjeff ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); 867219820Sjeff return ret; 868219820Sjeff } 869219820Sjeff 870219820Sjeff qp_attr.qp_state = IB_QPS_RTS; 871219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 872219820Sjeff if (ret) { 873219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); 874219820Sjeff return ret; 875219820Sjeff } 876219820Sjeff ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); 877219820Sjeff if (ret) { 878219820Sjeff ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); 879219820Sjeff return ret; 880219820Sjeff } 881219820Sjeff 882219820Sjeff bzero(&mbqueue, sizeof(mbqueue)); 883219820Sjeff 884219820Sjeff spin_lock_irq(&priv->lock); 885219820Sjeff set_bit(IPOIB_FLAG_OPER_UP, &p->flags); 886219820Sjeff if (p->path) 887219820Sjeff for (;;) { 888219820Sjeff _IF_DEQUEUE(&p->path->queue, mb); 889219820Sjeff if (mb == NULL) 890219820Sjeff break; 891219820Sjeff _IF_ENQUEUE(&mbqueue, mb); 892219820Sjeff } 893219820Sjeff spin_unlock_irq(&priv->lock); 894219820Sjeff 895219820Sjeff for (;;) { 896219820Sjeff struct ifnet *dev = p->priv->dev; 897219820Sjeff _IF_DEQUEUE(&mbqueue, mb); 898219820Sjeff if (mb == NULL) 899219820Sjeff break; 900219820Sjeff mb->m_pkthdr.rcvif = dev; 901219820Sjeff if (dev->if_transmit(dev, mb)) 902219820Sjeff ipoib_warn(priv, "dev_queue_xmit failed " 903219820Sjeff "to requeue packet\n"); 904219820Sjeff } 905219820Sjeff 906219820Sjeff ret = ib_send_cm_rtu(cm_id, NULL, 0); 907219820Sjeff if (ret) { 908219820Sjeff ipoib_warn(priv, "failed to send RTU: %d\n", ret); 909219820Sjeff return ret; 910219820Sjeff } 911219820Sjeff return 0; 912219820Sjeff} 913219820Sjeff 914219820Sjeffstatic struct ib_qp *ipoib_cm_create_tx_qp(struct ipoib_dev_priv *priv, 915219820Sjeff struct ipoib_cm_tx *tx) 916219820Sjeff{ 917219820Sjeff struct ib_qp_init_attr attr = { 918219820Sjeff .send_cq = priv->send_cq, 919219820Sjeff .recv_cq = priv->recv_cq, 920219820Sjeff .srq = priv->cm.srq, 921219820Sjeff .cap.max_send_wr = ipoib_sendq_size, 922219820Sjeff .cap.max_send_sge = priv->cm.num_frags, 923219820Sjeff .sq_sig_type = IB_SIGNAL_ALL_WR, 924219820Sjeff .qp_type = IB_QPT_RC, 925219820Sjeff .qp_context = tx 926219820Sjeff }; 927219820Sjeff 928219820Sjeff return ib_create_qp(priv->pd, &attr); 929219820Sjeff} 930219820Sjeff 931219820Sjeffstatic int ipoib_cm_send_req(struct ipoib_dev_priv *priv, 932219820Sjeff struct ib_cm_id *id, struct ib_qp *qp, 933219820Sjeff u32 qpn, 934219820Sjeff struct ib_sa_path_rec *pathrec) 935219820Sjeff{ 936219820Sjeff struct ipoib_cm_data data = {}; 937219820Sjeff struct ib_cm_req_param req = {}; 938219820Sjeff 939219820Sjeff ipoib_dbg(priv, "cm send req\n"); 940219820Sjeff 941219820Sjeff data.qpn = cpu_to_be32(priv->qp->qp_num); 942219820Sjeff data.mtu = cpu_to_be32(priv->cm.max_cm_mtu); 943219820Sjeff 944219820Sjeff req.primary_path = pathrec; 945219820Sjeff req.alternate_path = NULL; 946219820Sjeff req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); 947219820Sjeff req.qp_num = qp->qp_num; 948219820Sjeff req.qp_type = qp->qp_type; 949219820Sjeff req.private_data = &data; 950219820Sjeff req.private_data_len = sizeof data; 951219820Sjeff req.flow_control = 0; 952219820Sjeff 953219820Sjeff req.starting_psn = 0; /* FIXME */ 954219820Sjeff 955219820Sjeff /* 956219820Sjeff * Pick some arbitrary defaults here; we could make these 957219820Sjeff * module parameters if anyone cared about setting them. 958219820Sjeff */ 959219820Sjeff req.responder_resources = 4; 960219820Sjeff req.remote_cm_response_timeout = 20; 961219820Sjeff req.local_cm_response_timeout = 20; 962219820Sjeff req.retry_count = 0; /* RFC draft warns against retries */ 963219820Sjeff req.rnr_retry_count = 0; /* RFC draft warns against retries */ 964219820Sjeff req.max_cm_retries = 15; 965219820Sjeff req.srq = ipoib_cm_has_srq(priv); 966219820Sjeff return ib_send_cm_req(id, &req); 967219820Sjeff} 968219820Sjeff 969219820Sjeffstatic int ipoib_cm_modify_tx_init(struct ipoib_dev_priv *priv, 970219820Sjeff struct ib_cm_id *cm_id, struct ib_qp *qp) 971219820Sjeff{ 972219820Sjeff struct ib_qp_attr qp_attr; 973219820Sjeff int qp_attr_mask, ret; 974219820Sjeff ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index); 975219820Sjeff if (ret) { 976219820Sjeff ipoib_warn(priv, "pkey 0x%x not found: %d\n", priv->pkey, ret); 977219820Sjeff return ret; 978219820Sjeff } 979219820Sjeff 980219820Sjeff qp_attr.qp_state = IB_QPS_INIT; 981219820Sjeff qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; 982219820Sjeff qp_attr.port_num = priv->port; 983219820Sjeff qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; 984219820Sjeff 985219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 986219820Sjeff if (ret) { 987219820Sjeff ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret); 988219820Sjeff return ret; 989219820Sjeff } 990219820Sjeff return 0; 991219820Sjeff} 992219820Sjeff 993219820Sjeffstatic int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, 994219820Sjeff struct ib_sa_path_rec *pathrec) 995219820Sjeff{ 996219820Sjeff struct ipoib_dev_priv *priv = p->priv; 997219820Sjeff int ret; 998219820Sjeff 999219820Sjeff p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, GFP_KERNEL); 1000219820Sjeff if (!p->tx_ring) { 1001219820Sjeff ipoib_warn(priv, "failed to allocate tx ring\n"); 1002219820Sjeff ret = -ENOMEM; 1003219820Sjeff goto err_tx; 1004219820Sjeff } 1005219820Sjeff memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); 1006219820Sjeff 1007219820Sjeff p->qp = ipoib_cm_create_tx_qp(p->priv, p); 1008219820Sjeff if (IS_ERR(p->qp)) { 1009219820Sjeff ret = PTR_ERR(p->qp); 1010219820Sjeff ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); 1011219820Sjeff goto err_qp; 1012219820Sjeff } 1013219820Sjeff 1014219820Sjeff p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p); 1015219820Sjeff if (IS_ERR(p->id)) { 1016219820Sjeff ret = PTR_ERR(p->id); 1017219820Sjeff ipoib_warn(priv, "failed to create tx cm id: %d\n", ret); 1018219820Sjeff goto err_id; 1019219820Sjeff } 1020219820Sjeff 1021219820Sjeff ret = ipoib_cm_modify_tx_init(p->priv, p->id, p->qp); 1022219820Sjeff if (ret) { 1023219820Sjeff ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret); 1024219820Sjeff goto err_modify; 1025219820Sjeff } 1026219820Sjeff 1027219820Sjeff ret = ipoib_cm_send_req(p->priv, p->id, p->qp, qpn, pathrec); 1028219820Sjeff if (ret) { 1029219820Sjeff ipoib_warn(priv, "failed to send cm req: %d\n", ret); 1030219820Sjeff goto err_send_cm; 1031219820Sjeff } 1032219820Sjeff 1033219820Sjeff ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n", 1034219820Sjeff p->qp->qp_num, pathrec->dgid.raw, qpn); 1035219820Sjeff 1036219820Sjeff return 0; 1037219820Sjeff 1038219820Sjefferr_send_cm: 1039219820Sjefferr_modify: 1040219820Sjeff ib_destroy_cm_id(p->id); 1041219820Sjefferr_id: 1042219820Sjeff p->id = NULL; 1043219820Sjeff ib_destroy_qp(p->qp); 1044219820Sjefferr_qp: 1045219820Sjeff p->qp = NULL; 1046219820Sjeff kfree(p->tx_ring); 1047219820Sjefferr_tx: 1048219820Sjeff return ret; 1049219820Sjeff} 1050219820Sjeff 1051219820Sjeffstatic void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) 1052219820Sjeff{ 1053219820Sjeff struct ipoib_dev_priv *priv = p->priv; 1054219820Sjeff struct ifnet *dev = priv->dev; 1055219820Sjeff struct ipoib_cm_tx_buf *tx_req; 1056219820Sjeff unsigned long begin; 1057219820Sjeff 1058219820Sjeff ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", 1059219820Sjeff p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); 1060219820Sjeff 1061219820Sjeff if (p->path) 1062219820Sjeff ipoib_path_free(priv, p->path); 1063219820Sjeff 1064219820Sjeff if (p->id) 1065219820Sjeff ib_destroy_cm_id(p->id); 1066219820Sjeff 1067219820Sjeff if (p->tx_ring) { 1068219820Sjeff /* Wait for all sends to complete */ 1069219820Sjeff begin = jiffies; 1070219820Sjeff while ((int) p->tx_tail - (int) p->tx_head < 0) { 1071219820Sjeff if (time_after(jiffies, begin + 5 * HZ)) { 1072219820Sjeff ipoib_warn(priv, "timing out; %d sends not completed\n", 1073219820Sjeff p->tx_head - p->tx_tail); 1074219820Sjeff goto timeout; 1075219820Sjeff } 1076219820Sjeff 1077219820Sjeff msleep(1); 1078219820Sjeff } 1079219820Sjeff } 1080219820Sjeff 1081219820Sjefftimeout: 1082219820Sjeff 1083219820Sjeff while ((int) p->tx_tail - (int) p->tx_head < 0) { 1084219820Sjeff tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 1085219820Sjeff ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); 1086219820Sjeff m_freem(tx_req->mb); 1087219820Sjeff ++p->tx_tail; 1088219820Sjeff if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 1089219820Sjeff (dev->if_drv_flags & IFF_DRV_OACTIVE) != 0 && 1090219820Sjeff test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 1091219820Sjeff dev->if_drv_flags &= ~IFF_DRV_OACTIVE; 1092219820Sjeff } 1093219820Sjeff 1094219820Sjeff if (p->qp) 1095219820Sjeff ib_destroy_qp(p->qp); 1096219820Sjeff 1097219820Sjeff kfree(p->tx_ring); 1098219820Sjeff kfree(p); 1099219820Sjeff} 1100219820Sjeff 1101219820Sjeffstatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 1102219820Sjeff struct ib_cm_event *event) 1103219820Sjeff{ 1104219820Sjeff struct ipoib_cm_tx *tx = cm_id->context; 1105219820Sjeff struct ipoib_dev_priv *priv = tx->priv; 1106219820Sjeff struct ipoib_path *path; 1107219820Sjeff unsigned long flags; 1108219820Sjeff int ret; 1109219820Sjeff 1110219820Sjeff switch (event->event) { 1111219820Sjeff case IB_CM_DREQ_RECEIVED: 1112219820Sjeff ipoib_dbg(priv, "DREQ received.\n"); 1113219820Sjeff ib_send_cm_drep(cm_id, NULL, 0); 1114219820Sjeff break; 1115219820Sjeff case IB_CM_REP_RECEIVED: 1116219820Sjeff ipoib_dbg(priv, "REP received.\n"); 1117219820Sjeff ret = ipoib_cm_rep_handler(cm_id, event); 1118219820Sjeff if (ret) 1119219820Sjeff ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, 1120219820Sjeff NULL, 0, NULL, 0); 1121219820Sjeff break; 1122219820Sjeff case IB_CM_REQ_ERROR: 1123219820Sjeff case IB_CM_REJ_RECEIVED: 1124219820Sjeff case IB_CM_TIMEWAIT_EXIT: 1125219820Sjeff ipoib_dbg(priv, "CM error %d.\n", event->event); 1126219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1127219820Sjeff path = tx->path; 1128219820Sjeff 1129219820Sjeff if (path) { 1130219820Sjeff path->cm = NULL; 1131219820Sjeff tx->path = NULL; 1132219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 1133219820Sjeff list_del(&path->list); 1134219820Sjeff } 1135219820Sjeff 1136219820Sjeff if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1137219820Sjeff list_move(&tx->list, &priv->cm.reap_list); 1138219820Sjeff queue_work(ipoib_workqueue, &priv->cm.reap_task); 1139219820Sjeff } 1140219820Sjeff 1141219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1142219820Sjeff if (path) 1143219820Sjeff ipoib_path_free(tx->priv, path); 1144219820Sjeff break; 1145219820Sjeff default: 1146219820Sjeff break; 1147219820Sjeff } 1148219820Sjeff 1149219820Sjeff return 0; 1150219820Sjeff} 1151219820Sjeff 1152219820Sjeffstruct ipoib_cm_tx *ipoib_cm_create_tx(struct ipoib_dev_priv *priv, 1153219820Sjeff struct ipoib_path *path) 1154219820Sjeff{ 1155219820Sjeff struct ipoib_cm_tx *tx; 1156219820Sjeff 1157219820Sjeff tx = kzalloc(sizeof *tx, GFP_ATOMIC); 1158219820Sjeff if (!tx) 1159219820Sjeff return NULL; 1160219820Sjeff 1161219820Sjeff ipoib_dbg(priv, "Creating cm tx\n"); 1162219820Sjeff path->cm = tx; 1163219820Sjeff tx->path = path; 1164219820Sjeff tx->priv = priv; 1165219820Sjeff list_add(&tx->list, &priv->cm.start_list); 1166219820Sjeff set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); 1167219820Sjeff queue_work(ipoib_workqueue, &priv->cm.start_task); 1168219820Sjeff return tx; 1169219820Sjeff} 1170219820Sjeff 1171219820Sjeffvoid ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) 1172219820Sjeff{ 1173219820Sjeff struct ipoib_dev_priv *priv = tx->priv; 1174219820Sjeff if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1175219820Sjeff spin_lock(&priv->lock); 1176219820Sjeff list_move(&tx->list, &priv->cm.reap_list); 1177219820Sjeff spin_unlock(&priv->lock); 1178219820Sjeff queue_work(ipoib_workqueue, &priv->cm.reap_task); 1179219820Sjeff ipoib_dbg(priv, "Reap connection for gid %pI6\n", 1180219820Sjeff tx->path->pathrec.dgid.raw); 1181219820Sjeff tx->path = NULL; 1182219820Sjeff } 1183219820Sjeff} 1184219820Sjeff 1185219820Sjeffstatic void ipoib_cm_tx_start(struct work_struct *work) 1186219820Sjeff{ 1187219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1188219820Sjeff cm.start_task); 1189219820Sjeff struct ipoib_path *path; 1190219820Sjeff struct ipoib_cm_tx *p; 1191219820Sjeff unsigned long flags; 1192219820Sjeff int ret; 1193219820Sjeff 1194219820Sjeff struct ib_sa_path_rec pathrec; 1195219820Sjeff u32 qpn; 1196219820Sjeff 1197219820Sjeff ipoib_dbg(priv, "cm start task\n"); 1198219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1199219820Sjeff 1200219820Sjeff while (!list_empty(&priv->cm.start_list)) { 1201219820Sjeff p = list_entry(priv->cm.start_list.next, typeof(*p), list); 1202219820Sjeff list_del_init(&p->list); 1203219820Sjeff path = p->path; 1204219820Sjeff qpn = IPOIB_QPN(path->hwaddr); 1205219820Sjeff memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); 1206219820Sjeff 1207219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1208219820Sjeff 1209219820Sjeff ret = ipoib_cm_tx_init(p, qpn, &pathrec); 1210219820Sjeff 1211219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1212219820Sjeff 1213219820Sjeff if (ret) { 1214219820Sjeff path = p->path; 1215219820Sjeff if (path) { 1216219820Sjeff path->cm = NULL; 1217219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 1218219820Sjeff list_del(&path->list); 1219219820Sjeff ipoib_path_free(priv, path); 1220219820Sjeff } 1221219820Sjeff list_del(&p->list); 1222219820Sjeff kfree(p); 1223219820Sjeff } 1224219820Sjeff } 1225219820Sjeff 1226219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1227219820Sjeff} 1228219820Sjeff 1229219820Sjeffstatic void ipoib_cm_tx_reap(struct work_struct *work) 1230219820Sjeff{ 1231219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1232219820Sjeff cm.reap_task); 1233219820Sjeff struct ipoib_cm_tx *p; 1234219820Sjeff unsigned long flags; 1235219820Sjeff 1236219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1237219820Sjeff 1238219820Sjeff while (!list_empty(&priv->cm.reap_list)) { 1239219820Sjeff p = list_entry(priv->cm.reap_list.next, typeof(*p), list); 1240219820Sjeff list_del(&p->list); 1241219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1242219820Sjeff ipoib_cm_tx_destroy(p); 1243219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1244219820Sjeff } 1245219820Sjeff 1246219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1247219820Sjeff} 1248219820Sjeff 1249219820Sjeffstatic void ipoib_cm_mb_reap(struct work_struct *work) 1250219820Sjeff{ 1251219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1252219820Sjeff cm.mb_task); 1253219820Sjeff struct mbuf *mb; 1254219820Sjeff unsigned long flags; 1255234183Sjhb#if defined(INET) || defined(INET6) 1256219820Sjeff unsigned mtu = priv->mcast_mtu; 1257234183Sjhb#endif 1258219820Sjeff uint16_t proto; 1259219820Sjeff 1260219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1261219820Sjeff 1262219820Sjeff for (;;) { 1263219820Sjeff IF_DEQUEUE(&priv->cm.mb_queue, mb); 1264219820Sjeff if (mb == NULL) 1265219820Sjeff break; 1266219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1267219820Sjeff 1268219820Sjeff proto = htons(*mtod(mb, uint16_t *)); 1269219820Sjeff m_adj(mb, IPOIB_ENCAP_LEN); 1270234183Sjhb switch (proto) { 1271234183Sjhb#if defined(INET) 1272234183Sjhb case ETHERTYPE_IP: 1273219820Sjeff icmp_error(mb, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, mtu); 1274234183Sjhb break; 1275234183Sjhb#endif 1276219820Sjeff#if defined(INET6) 1277234183Sjhb case ETHERTYPE_IPV6: 1278219820Sjeff icmp6_error(mb, ICMP6_PACKET_TOO_BIG, 0, mtu); 1279234183Sjhb break; 1280219820Sjeff#endif 1281234183Sjhb default: 1282219820Sjeff m_freem(mb); 1283234183Sjhb } 1284219820Sjeff 1285219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1286219820Sjeff } 1287219820Sjeff 1288219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1289219820Sjeff} 1290219820Sjeff 1291219820Sjeffvoid 1292219820Sjeffipoib_cm_mb_too_long(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int mtu) 1293219820Sjeff{ 1294219820Sjeff int e = priv->cm.mb_queue.ifq_len; 1295219820Sjeff 1296219820Sjeff IF_ENQUEUE(&priv->cm.mb_queue, mb); 1297219820Sjeff if (e == 0) 1298219820Sjeff queue_work(ipoib_workqueue, &priv->cm.mb_task); 1299219820Sjeff} 1300219820Sjeff 1301219820Sjeffstatic void ipoib_cm_rx_reap(struct work_struct *work) 1302219820Sjeff{ 1303219820Sjeff ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv, 1304219820Sjeff cm.rx_reap_task)); 1305219820Sjeff} 1306219820Sjeff 1307219820Sjeffstatic void ipoib_cm_stale_task(struct work_struct *work) 1308219820Sjeff{ 1309219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1310219820Sjeff cm.stale_task.work); 1311219820Sjeff struct ipoib_cm_rx *p; 1312219820Sjeff int ret; 1313219820Sjeff 1314219820Sjeff spin_lock_irq(&priv->lock); 1315219820Sjeff while (!list_empty(&priv->cm.passive_ids)) { 1316219820Sjeff /* List is sorted by LRU, start from tail, 1317219820Sjeff * stop when we see a recently used entry */ 1318219820Sjeff p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); 1319219820Sjeff if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) 1320219820Sjeff break; 1321219820Sjeff list_move(&p->list, &priv->cm.rx_error_list); 1322219820Sjeff p->state = IPOIB_CM_RX_ERROR; 1323219820Sjeff spin_unlock_irq(&priv->lock); 1324219820Sjeff ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 1325219820Sjeff if (ret) 1326219820Sjeff ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 1327219820Sjeff spin_lock_irq(&priv->lock); 1328219820Sjeff } 1329219820Sjeff 1330219820Sjeff if (!list_empty(&priv->cm.passive_ids)) 1331219820Sjeff queue_delayed_work(ipoib_workqueue, 1332219820Sjeff &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 1333219820Sjeff spin_unlock_irq(&priv->lock); 1334219820Sjeff} 1335219820Sjeff 1336219820Sjeff 1337219820Sjeffstatic void ipoib_cm_create_srq(struct ipoib_dev_priv *priv, int max_sge) 1338219820Sjeff{ 1339219820Sjeff struct ib_srq_init_attr srq_init_attr = { 1340219820Sjeff .attr = { 1341219820Sjeff .max_wr = ipoib_recvq_size, 1342219820Sjeff .max_sge = max_sge 1343219820Sjeff } 1344219820Sjeff }; 1345219820Sjeff 1346219820Sjeff priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); 1347219820Sjeff if (IS_ERR(priv->cm.srq)) { 1348219820Sjeff if (PTR_ERR(priv->cm.srq) != -ENOSYS) 1349219820Sjeff printk(KERN_WARNING "%s: failed to allocate SRQ, error %ld\n", 1350219820Sjeff priv->ca->name, PTR_ERR(priv->cm.srq)); 1351219820Sjeff priv->cm.srq = NULL; 1352219820Sjeff return; 1353219820Sjeff } 1354219820Sjeff 1355219820Sjeff priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, GFP_KERNEL); 1356219820Sjeff if (!priv->cm.srq_ring) { 1357219820Sjeff printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", 1358219820Sjeff priv->ca->name, ipoib_recvq_size); 1359219820Sjeff ib_destroy_srq(priv->cm.srq); 1360219820Sjeff priv->cm.srq = NULL; 1361219820Sjeff return; 1362219820Sjeff } 1363219820Sjeff 1364219820Sjeff memset(priv->cm.srq_ring, 0, ipoib_recvq_size * sizeof *priv->cm.srq_ring); 1365219820Sjeff} 1366219820Sjeff 1367219820Sjeffint ipoib_cm_dev_init(struct ipoib_dev_priv *priv) 1368219820Sjeff{ 1369219820Sjeff struct ifnet *dev = priv->dev; 1370219820Sjeff int i, ret; 1371219820Sjeff struct ib_device_attr attr; 1372219820Sjeff 1373219820Sjeff INIT_LIST_HEAD(&priv->cm.passive_ids); 1374219820Sjeff INIT_LIST_HEAD(&priv->cm.reap_list); 1375219820Sjeff INIT_LIST_HEAD(&priv->cm.start_list); 1376219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_error_list); 1377219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_flush_list); 1378219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_drain_list); 1379219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_reap_list); 1380219820Sjeff INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); 1381219820Sjeff INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); 1382219820Sjeff INIT_WORK(&priv->cm.mb_task, ipoib_cm_mb_reap); 1383219820Sjeff INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap); 1384219820Sjeff INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); 1385219820Sjeff 1386219820Sjeff bzero(&priv->cm.mb_queue, sizeof(priv->cm.mb_queue)); 1387219820Sjeff mtx_init(&priv->cm.mb_queue.ifq_mtx, 1388219820Sjeff dev->if_xname, "if send queue", MTX_DEF); 1389219820Sjeff 1390219820Sjeff ret = ib_query_device(priv->ca, &attr); 1391219820Sjeff if (ret) { 1392219820Sjeff printk(KERN_WARNING "ib_query_device() failed with %d\n", ret); 1393219820Sjeff return ret; 1394219820Sjeff } 1395219820Sjeff 1396219820Sjeff ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge); 1397219820Sjeff 1398219820Sjeff attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge); 1399219820Sjeff ipoib_cm_create_srq(priv, attr.max_srq_sge); 1400219820Sjeff if (ipoib_cm_has_srq(priv)) { 1401219820Sjeff priv->cm.max_cm_mtu = attr.max_srq_sge * MJUMPAGESIZE; 1402219820Sjeff priv->cm.num_frags = attr.max_srq_sge; 1403219820Sjeff ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n", 1404219820Sjeff priv->cm.max_cm_mtu, priv->cm.num_frags); 1405219820Sjeff } else { 1406219820Sjeff priv->cm.max_cm_mtu = IPOIB_CM_MAX_MTU; 1407219820Sjeff priv->cm.num_frags = IPOIB_CM_RX_SG; 1408219820Sjeff } 1409219820Sjeff 1410219820Sjeff ipoib_cm_init_rx_wr(priv, &priv->cm.rx_wr, priv->cm.rx_sge); 1411219820Sjeff 1412219820Sjeff if (ipoib_cm_has_srq(priv)) { 1413219820Sjeff for (i = 0; i < ipoib_recvq_size; ++i) { 1414219820Sjeff if (!ipoib_cm_alloc_rx_mb(priv, &priv->cm.srq_ring[i])) { 1415219820Sjeff ipoib_warn(priv, "failed to allocate " 1416219820Sjeff "receive buffer %d\n", i); 1417219820Sjeff ipoib_cm_dev_cleanup(priv); 1418219820Sjeff return -ENOMEM; 1419219820Sjeff } 1420219820Sjeff 1421219820Sjeff if (ipoib_cm_post_receive_srq(priv, i)) { 1422219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_srq " 1423219820Sjeff "failed for buf %d\n", i); 1424219820Sjeff ipoib_cm_dev_cleanup(priv); 1425219820Sjeff return -EIO; 1426219820Sjeff } 1427219820Sjeff } 1428219820Sjeff } 1429219820Sjeff 1430219820Sjeff IF_LLADDR(priv->dev)[0] = IPOIB_FLAGS_RC; 1431219820Sjeff return 0; 1432219820Sjeff} 1433219820Sjeff 1434219820Sjeffvoid ipoib_cm_dev_cleanup(struct ipoib_dev_priv *priv) 1435219820Sjeff{ 1436219820Sjeff int ret; 1437219820Sjeff 1438219820Sjeff if (!priv->cm.srq) 1439219820Sjeff return; 1440219820Sjeff 1441219820Sjeff ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); 1442219820Sjeff 1443219820Sjeff ret = ib_destroy_srq(priv->cm.srq); 1444219820Sjeff if (ret) 1445219820Sjeff ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret); 1446219820Sjeff 1447219820Sjeff priv->cm.srq = NULL; 1448219820Sjeff if (!priv->cm.srq_ring) 1449219820Sjeff return; 1450219820Sjeff 1451219820Sjeff ipoib_cm_free_rx_ring(priv, priv->cm.srq_ring); 1452219820Sjeff priv->cm.srq_ring = NULL; 1453219820Sjeff 1454219820Sjeff mtx_destroy(&priv->cm.mb_queue.ifq_mtx); 1455219820Sjeff} 1456219820Sjeff 1457219820Sjeff#endif /* CONFIG_INFINIBAND_IPOIB_CM */ 1458