ipoib_cm.c revision 272225
1219820Sjeff/* 2219820Sjeff * Copyright (c) 2006 Mellanox Technologies. All rights reserved 3219820Sjeff * 4219820Sjeff * This software is available to you under a choice of one of two 5219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 6219820Sjeff * General Public License (GPL) Version 2, available from the file 7219820Sjeff * COPYING in the main directory of this source tree, or the 8219820Sjeff * OpenIB.org BSD license below: 9219820Sjeff * 10219820Sjeff * Redistribution and use in source and binary forms, with or 11219820Sjeff * without modification, are permitted provided that the following 12219820Sjeff * conditions are met: 13219820Sjeff * 14219820Sjeff * - Redistributions of source code must retain the above 15219820Sjeff * copyright notice, this list of conditions and the following 16219820Sjeff * disclaimer. 17219820Sjeff * 18219820Sjeff * - Redistributions in binary form must reproduce the above 19219820Sjeff * copyright notice, this list of conditions and the following 20219820Sjeff * disclaimer in the documentation and/or other materials 21219820Sjeff * provided with the distribution. 22219820Sjeff * 23219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30219820Sjeff * SOFTWARE. 31219820Sjeff */ 32219820Sjeff 33219820Sjeff#include "ipoib.h" 34219820Sjeff 35219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_CM 36219820Sjeff 37219820Sjeff#include <netinet/ip.h> 38219820Sjeff#include <netinet/ip_icmp.h> 39219820Sjeff#include <netinet/icmp6.h> 40219820Sjeff 41219820Sjeff#include <rdma/ib_cm.h> 42219820Sjeff#include <rdma/ib_cache.h> 43219820Sjeff#include <linux/delay.h> 44219820Sjeff 45219820Sjeffint ipoib_max_conn_qp = 128; 46219820Sjeff 47219820Sjeffmodule_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444); 48219820SjeffMODULE_PARM_DESC(max_nonsrq_conn_qp, 49219820Sjeff "Max number of connected-mode QPs per interface " 50219820Sjeff "(applied only if shared receive queue is not available)"); 51219820Sjeff 52219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 53219820Sjeffstatic int data_debug_level; 54219820Sjeff 55219820Sjeffmodule_param_named(cm_data_debug_level, data_debug_level, int, 0644); 56219820SjeffMODULE_PARM_DESC(cm_data_debug_level, 57219820Sjeff "Enable data path debug tracing for connected mode if > 0"); 58219820Sjeff#endif 59219820Sjeff 60219820Sjeff#define IPOIB_CM_IETF_ID 0x1000000000000000ULL 61219820Sjeff 62219820Sjeff#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) 63219820Sjeff#define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ) 64219820Sjeff#define IPOIB_CM_RX_DELAY (3 * 256 * HZ) 65219820Sjeff#define IPOIB_CM_RX_UPDATE_MASK (0x3) 66219820Sjeff 67219820Sjeffstatic struct ib_qp_attr ipoib_cm_err_attr = { 68219820Sjeff .qp_state = IB_QPS_ERR 69219820Sjeff}; 70219820Sjeff 71219820Sjeff#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff 72219820Sjeff 73219820Sjeffstatic struct ib_send_wr ipoib_cm_rx_drain_wr = { 74219820Sjeff .wr_id = IPOIB_CM_RX_DRAIN_WRID, 75219820Sjeff .opcode = IB_WR_SEND, 76219820Sjeff}; 77219820Sjeff 78219820Sjeffstatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 79219820Sjeff struct ib_cm_event *event); 80219820Sjeff 81219820Sjeffstatic void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req) 82219820Sjeff{ 83219820Sjeff 84219820Sjeff ipoib_dma_unmap_rx(priv, (struct ipoib_rx_buf *)rx_req); 85219820Sjeff 86219820Sjeff} 87219820Sjeff 88219820Sjeffstatic int ipoib_cm_post_receive_srq(struct ipoib_dev_priv *priv, int id) 89219820Sjeff{ 90219820Sjeff struct ib_recv_wr *bad_wr; 91219820Sjeff struct ipoib_rx_buf *rx_req; 92219820Sjeff struct mbuf *m; 93219820Sjeff int ret; 94219820Sjeff int i; 95219820Sjeff 96219820Sjeff rx_req = (struct ipoib_rx_buf *)&priv->cm.srq_ring[id]; 97219820Sjeff for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) { 98219820Sjeff priv->cm.rx_sge[i].addr = rx_req->mapping[i]; 99219820Sjeff priv->cm.rx_sge[i].length = m->m_len; 100219820Sjeff } 101219820Sjeff 102219820Sjeff priv->cm.rx_wr.num_sge = i; 103219820Sjeff priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 104219820Sjeff 105219820Sjeff ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); 106219820Sjeff if (unlikely(ret)) { 107219820Sjeff ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); 108219820Sjeff ipoib_dma_unmap_rx(priv, rx_req); 109219820Sjeff m_freem(priv->cm.srq_ring[id].mb); 110219820Sjeff priv->cm.srq_ring[id].mb = NULL; 111219820Sjeff } 112219820Sjeff 113219820Sjeff return ret; 114219820Sjeff} 115219820Sjeff 116219820Sjeffstatic int ipoib_cm_post_receive_nonsrq(struct ipoib_dev_priv *priv, 117219820Sjeff struct ipoib_cm_rx *rx, 118219820Sjeff struct ib_recv_wr *wr, 119219820Sjeff struct ib_sge *sge, int id) 120219820Sjeff{ 121219820Sjeff struct ipoib_rx_buf *rx_req; 122219820Sjeff struct ib_recv_wr *bad_wr; 123219820Sjeff struct mbuf *m; 124219820Sjeff int ret; 125219820Sjeff int i; 126219820Sjeff 127219820Sjeff rx_req = (struct ipoib_rx_buf *)&rx->rx_ring[id]; 128219820Sjeff for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) { 129219820Sjeff sge[i].addr = rx_req->mapping[i]; 130219820Sjeff sge[i].length = m->m_len; 131219820Sjeff } 132219820Sjeff 133219820Sjeff wr->num_sge = i; 134219820Sjeff wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 135219820Sjeff 136219820Sjeff ret = ib_post_recv(rx->qp, wr, &bad_wr); 137219820Sjeff if (unlikely(ret)) { 138219820Sjeff ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); 139219820Sjeff ipoib_dma_unmap_rx(priv, rx_req); 140219820Sjeff m_freem(rx->rx_ring[id].mb); 141219820Sjeff rx->rx_ring[id].mb = NULL; 142219820Sjeff } 143219820Sjeff 144219820Sjeff return ret; 145219820Sjeff} 146219820Sjeff 147219820Sjeffstatic struct mbuf * 148219820Sjeffipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req) 149219820Sjeff{ 150219820Sjeff return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req, 151219820Sjeff priv->cm.max_cm_mtu); 152219820Sjeff} 153219820Sjeff 154219820Sjeffstatic void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv, 155219820Sjeff struct ipoib_cm_rx_buf *rx_ring) 156219820Sjeff{ 157219820Sjeff int i; 158219820Sjeff 159219820Sjeff for (i = 0; i < ipoib_recvq_size; ++i) 160219820Sjeff if (rx_ring[i].mb) { 161219820Sjeff ipoib_cm_dma_unmap_rx(priv, &rx_ring[i]); 162219820Sjeff m_freem(rx_ring[i].mb); 163219820Sjeff } 164219820Sjeff 165219820Sjeff kfree(rx_ring); 166219820Sjeff} 167219820Sjeff 168219820Sjeffstatic void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) 169219820Sjeff{ 170219820Sjeff struct ib_send_wr *bad_wr; 171219820Sjeff struct ipoib_cm_rx *p; 172219820Sjeff 173219820Sjeff /* We only reserved 1 extra slot in CQ for drain WRs, so 174219820Sjeff * make sure we have at most 1 outstanding WR. */ 175219820Sjeff if (list_empty(&priv->cm.rx_flush_list) || 176219820Sjeff !list_empty(&priv->cm.rx_drain_list)) 177219820Sjeff return; 178219820Sjeff 179219820Sjeff /* 180219820Sjeff * QPs on flush list are error state. This way, a "flush 181219820Sjeff * error" WC will be immediately generated for each WR we post. 182219820Sjeff */ 183219820Sjeff p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); 184219820Sjeff if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) 185219820Sjeff ipoib_warn(priv, "failed to post drain wr\n"); 186219820Sjeff 187219820Sjeff list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); 188219820Sjeff} 189219820Sjeff 190219820Sjeffstatic void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) 191219820Sjeff{ 192219820Sjeff struct ipoib_cm_rx *p = ctx; 193219820Sjeff struct ipoib_dev_priv *priv = p->priv; 194219820Sjeff unsigned long flags; 195219820Sjeff 196219820Sjeff if (event->event != IB_EVENT_QP_LAST_WQE_REACHED) 197219820Sjeff return; 198219820Sjeff 199219820Sjeff spin_lock_irqsave(&priv->lock, flags); 200219820Sjeff list_move(&p->list, &priv->cm.rx_flush_list); 201219820Sjeff p->state = IPOIB_CM_RX_FLUSH; 202219820Sjeff ipoib_cm_start_rx_drain(priv); 203219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 204219820Sjeff} 205219820Sjeff 206219820Sjeffstatic struct ib_qp *ipoib_cm_create_rx_qp(struct ipoib_dev_priv *priv, 207219820Sjeff struct ipoib_cm_rx *p) 208219820Sjeff{ 209219820Sjeff struct ib_qp_init_attr attr = { 210219820Sjeff .event_handler = ipoib_cm_rx_event_handler, 211219820Sjeff .send_cq = priv->recv_cq, /* For drain WR */ 212219820Sjeff .recv_cq = priv->recv_cq, 213219820Sjeff .srq = priv->cm.srq, 214219820Sjeff .cap.max_send_wr = 1, /* For drain WR */ 215219820Sjeff .cap.max_send_sge = 1, 216219820Sjeff .sq_sig_type = IB_SIGNAL_ALL_WR, 217219820Sjeff .qp_type = IB_QPT_RC, 218219820Sjeff .qp_context = p, 219219820Sjeff }; 220219820Sjeff 221219820Sjeff if (!ipoib_cm_has_srq(priv)) { 222219820Sjeff attr.cap.max_recv_wr = ipoib_recvq_size; 223219820Sjeff attr.cap.max_recv_sge = priv->cm.num_frags; 224219820Sjeff } 225219820Sjeff 226219820Sjeff return ib_create_qp(priv->pd, &attr); 227219820Sjeff} 228219820Sjeff 229219820Sjeffstatic int ipoib_cm_modify_rx_qp(struct ipoib_dev_priv *priv, 230219820Sjeff struct ib_cm_id *cm_id, struct ib_qp *qp, 231219820Sjeff unsigned psn) 232219820Sjeff{ 233219820Sjeff struct ib_qp_attr qp_attr; 234219820Sjeff int qp_attr_mask, ret; 235219820Sjeff 236219820Sjeff qp_attr.qp_state = IB_QPS_INIT; 237219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 238219820Sjeff if (ret) { 239219820Sjeff ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); 240219820Sjeff return ret; 241219820Sjeff } 242219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 243219820Sjeff if (ret) { 244219820Sjeff ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); 245219820Sjeff return ret; 246219820Sjeff } 247219820Sjeff qp_attr.qp_state = IB_QPS_RTR; 248219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 249219820Sjeff if (ret) { 250219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); 251219820Sjeff return ret; 252219820Sjeff } 253219820Sjeff qp_attr.rq_psn = psn; 254219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 255219820Sjeff if (ret) { 256219820Sjeff ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); 257219820Sjeff return ret; 258219820Sjeff } 259219820Sjeff 260219820Sjeff /* 261219820Sjeff * Current Mellanox HCA firmware won't generate completions 262219820Sjeff * with error for drain WRs unless the QP has been moved to 263219820Sjeff * RTS first. This work-around leaves a window where a QP has 264219820Sjeff * moved to error asynchronously, but this will eventually get 265219820Sjeff * fixed in firmware, so let's not error out if modify QP 266219820Sjeff * fails. 267219820Sjeff */ 268219820Sjeff qp_attr.qp_state = IB_QPS_RTS; 269219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 270219820Sjeff if (ret) { 271219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); 272219820Sjeff return 0; 273219820Sjeff } 274219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 275219820Sjeff if (ret) { 276219820Sjeff ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); 277219820Sjeff return 0; 278219820Sjeff } 279219820Sjeff 280219820Sjeff return 0; 281219820Sjeff} 282219820Sjeff 283219820Sjeffstatic void ipoib_cm_init_rx_wr(struct ipoib_dev_priv *priv, 284219820Sjeff struct ib_recv_wr *wr, 285219820Sjeff struct ib_sge *sge) 286219820Sjeff{ 287219820Sjeff int i; 288219820Sjeff 289219820Sjeff for (i = 0; i < IPOIB_CM_RX_SG; i++) 290219820Sjeff sge[i].lkey = priv->mr->lkey; 291219820Sjeff 292219820Sjeff wr->next = NULL; 293219820Sjeff wr->sg_list = sge; 294219820Sjeff wr->num_sge = 1; 295219820Sjeff} 296219820Sjeff 297219820Sjeffstatic int ipoib_cm_nonsrq_init_rx(struct ipoib_dev_priv *priv, 298219820Sjeff struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) 299219820Sjeff{ 300219820Sjeff struct { 301219820Sjeff struct ib_recv_wr wr; 302219820Sjeff struct ib_sge sge[IPOIB_CM_RX_SG]; 303219820Sjeff } *t; 304219820Sjeff int ret; 305219820Sjeff int i; 306219820Sjeff 307219820Sjeff rx->rx_ring = kzalloc(ipoib_recvq_size * sizeof *rx->rx_ring, GFP_KERNEL); 308219820Sjeff if (!rx->rx_ring) { 309219820Sjeff printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n", 310219820Sjeff priv->ca->name, ipoib_recvq_size); 311219820Sjeff return -ENOMEM; 312219820Sjeff } 313219820Sjeff 314219820Sjeff memset(rx->rx_ring, 0, ipoib_recvq_size * sizeof *rx->rx_ring); 315219820Sjeff 316219820Sjeff t = kmalloc(sizeof *t, GFP_KERNEL); 317219820Sjeff if (!t) { 318219820Sjeff ret = -ENOMEM; 319219820Sjeff goto err_free; 320219820Sjeff } 321219820Sjeff 322219820Sjeff ipoib_cm_init_rx_wr(priv, &t->wr, t->sge); 323219820Sjeff 324219820Sjeff spin_lock_irq(&priv->lock); 325219820Sjeff 326219820Sjeff if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { 327219820Sjeff spin_unlock_irq(&priv->lock); 328219820Sjeff ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0); 329219820Sjeff ret = -EINVAL; 330219820Sjeff goto err_free; 331219820Sjeff } else 332219820Sjeff ++priv->cm.nonsrq_conn_qp; 333219820Sjeff 334219820Sjeff spin_unlock_irq(&priv->lock); 335219820Sjeff 336219820Sjeff for (i = 0; i < ipoib_recvq_size; ++i) { 337219820Sjeff if (!ipoib_cm_alloc_rx_mb(priv, &rx->rx_ring[i])) { 338219820Sjeff ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 339219820Sjeff ret = -ENOMEM; 340219820Sjeff goto err_count; 341219820Sjeff } 342219820Sjeff ret = ipoib_cm_post_receive_nonsrq(priv, rx, &t->wr, t->sge, i); 343219820Sjeff if (ret) { 344219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " 345219820Sjeff "failed for buf %d\n", i); 346219820Sjeff ret = -EIO; 347219820Sjeff goto err_count; 348219820Sjeff } 349219820Sjeff } 350219820Sjeff 351219820Sjeff rx->recv_count = ipoib_recvq_size; 352219820Sjeff 353219820Sjeff kfree(t); 354219820Sjeff 355219820Sjeff return 0; 356219820Sjeff 357219820Sjefferr_count: 358219820Sjeff spin_lock_irq(&priv->lock); 359219820Sjeff --priv->cm.nonsrq_conn_qp; 360219820Sjeff spin_unlock_irq(&priv->lock); 361219820Sjeff 362219820Sjefferr_free: 363219820Sjeff kfree(t); 364219820Sjeff ipoib_cm_free_rx_ring(priv, rx->rx_ring); 365219820Sjeff 366219820Sjeff return ret; 367219820Sjeff} 368219820Sjeff 369219820Sjeffstatic int ipoib_cm_send_rep(struct ipoib_dev_priv *priv, struct ib_cm_id *cm_id, 370219820Sjeff struct ib_qp *qp, struct ib_cm_req_event_param *req, 371219820Sjeff unsigned psn) 372219820Sjeff{ 373219820Sjeff struct ipoib_cm_data data = {}; 374219820Sjeff struct ib_cm_rep_param rep = {}; 375219820Sjeff 376219820Sjeff data.qpn = cpu_to_be32(priv->qp->qp_num); 377219820Sjeff data.mtu = cpu_to_be32(priv->cm.max_cm_mtu); 378219820Sjeff 379219820Sjeff rep.private_data = &data; 380219820Sjeff rep.private_data_len = sizeof data; 381219820Sjeff rep.flow_control = 0; 382219820Sjeff rep.rnr_retry_count = req->rnr_retry_count; 383219820Sjeff rep.srq = ipoib_cm_has_srq(priv); 384219820Sjeff rep.qp_num = qp->qp_num; 385219820Sjeff rep.starting_psn = psn; 386219820Sjeff return ib_send_cm_rep(cm_id, &rep); 387219820Sjeff} 388219820Sjeff 389219820Sjeffstatic int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 390219820Sjeff{ 391219820Sjeff struct ipoib_dev_priv *priv = cm_id->context; 392219820Sjeff struct ipoib_cm_rx *p; 393219820Sjeff unsigned psn; 394219820Sjeff int ret; 395219820Sjeff 396219820Sjeff ipoib_dbg(priv, "REQ arrived\n"); 397219820Sjeff p = kzalloc(sizeof *p, GFP_KERNEL); 398219820Sjeff if (!p) 399219820Sjeff return -ENOMEM; 400219820Sjeff p->priv = priv; 401219820Sjeff p->id = cm_id; 402219820Sjeff cm_id->context = p; 403219820Sjeff p->state = IPOIB_CM_RX_LIVE; 404219820Sjeff p->jiffies = jiffies; 405219820Sjeff INIT_LIST_HEAD(&p->list); 406219820Sjeff 407219820Sjeff p->qp = ipoib_cm_create_rx_qp(priv, p); 408219820Sjeff if (IS_ERR(p->qp)) { 409219820Sjeff ret = PTR_ERR(p->qp); 410219820Sjeff goto err_qp; 411219820Sjeff } 412219820Sjeff 413219820Sjeff psn = random() & 0xffffff; 414219820Sjeff ret = ipoib_cm_modify_rx_qp(priv, cm_id, p->qp, psn); 415219820Sjeff if (ret) 416219820Sjeff goto err_modify; 417219820Sjeff 418219820Sjeff if (!ipoib_cm_has_srq(priv)) { 419219820Sjeff ret = ipoib_cm_nonsrq_init_rx(priv, cm_id, p); 420219820Sjeff if (ret) 421219820Sjeff goto err_modify; 422219820Sjeff } 423219820Sjeff 424219820Sjeff spin_lock_irq(&priv->lock); 425219820Sjeff queue_delayed_work(ipoib_workqueue, 426219820Sjeff &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 427219820Sjeff /* Add this entry to passive ids list head, but do not re-add it 428219820Sjeff * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ 429219820Sjeff p->jiffies = jiffies; 430219820Sjeff if (p->state == IPOIB_CM_RX_LIVE) 431219820Sjeff list_move(&p->list, &priv->cm.passive_ids); 432219820Sjeff spin_unlock_irq(&priv->lock); 433219820Sjeff 434219820Sjeff ret = ipoib_cm_send_rep(priv, cm_id, p->qp, &event->param.req_rcvd, psn); 435219820Sjeff if (ret) { 436219820Sjeff ipoib_warn(priv, "failed to send REP: %d\n", ret); 437219820Sjeff if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) 438219820Sjeff ipoib_warn(priv, "unable to move qp to error state\n"); 439219820Sjeff } 440219820Sjeff return 0; 441219820Sjeff 442219820Sjefferr_modify: 443219820Sjeff ib_destroy_qp(p->qp); 444219820Sjefferr_qp: 445219820Sjeff kfree(p); 446219820Sjeff return ret; 447219820Sjeff} 448219820Sjeff 449219820Sjeffstatic int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, 450219820Sjeff struct ib_cm_event *event) 451219820Sjeff{ 452219820Sjeff struct ipoib_cm_rx *p; 453219820Sjeff struct ipoib_dev_priv *priv; 454219820Sjeff 455219820Sjeff switch (event->event) { 456219820Sjeff case IB_CM_REQ_RECEIVED: 457219820Sjeff return ipoib_cm_req_handler(cm_id, event); 458219820Sjeff case IB_CM_DREQ_RECEIVED: 459219820Sjeff p = cm_id->context; 460219820Sjeff ib_send_cm_drep(cm_id, NULL, 0); 461219820Sjeff /* Fall through */ 462219820Sjeff case IB_CM_REJ_RECEIVED: 463219820Sjeff p = cm_id->context; 464219820Sjeff priv = p->priv; 465219820Sjeff if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) 466219820Sjeff ipoib_warn(priv, "unable to move qp to error state\n"); 467219820Sjeff /* Fall through */ 468219820Sjeff default: 469219820Sjeff return 0; 470219820Sjeff } 471219820Sjeff} 472219820Sjeff 473219820Sjeffvoid ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) 474219820Sjeff{ 475219820Sjeff struct ipoib_cm_rx_buf saverx; 476219820Sjeff struct ipoib_cm_rx_buf *rx_ring; 477219820Sjeff unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); 478219820Sjeff struct ifnet *dev = priv->dev; 479219820Sjeff struct mbuf *mb, *newmb; 480219820Sjeff struct ipoib_cm_rx *p; 481219820Sjeff int has_srq; 482219820Sjeff u_short proto; 483219820Sjeff 484219820Sjeff ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", 485219820Sjeff wr_id, wc->status); 486219820Sjeff 487219820Sjeff if (unlikely(wr_id >= ipoib_recvq_size)) { 488219820Sjeff if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { 489219820Sjeff spin_lock(&priv->lock); 490219820Sjeff list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 491219820Sjeff ipoib_cm_start_rx_drain(priv); 492219820Sjeff if (priv->cm.id != NULL) 493219820Sjeff queue_work(ipoib_workqueue, 494219820Sjeff &priv->cm.rx_reap_task); 495219820Sjeff spin_unlock(&priv->lock); 496219820Sjeff } else 497219820Sjeff ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 498219820Sjeff wr_id, ipoib_recvq_size); 499219820Sjeff return; 500219820Sjeff } 501219820Sjeff 502219820Sjeff p = wc->qp->qp_context; 503219820Sjeff 504219820Sjeff has_srq = ipoib_cm_has_srq(priv); 505219820Sjeff rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring; 506219820Sjeff 507219820Sjeff mb = rx_ring[wr_id].mb; 508219820Sjeff 509219820Sjeff if (unlikely(wc->status != IB_WC_SUCCESS)) { 510219820Sjeff ipoib_dbg(priv, "cm recv error " 511219820Sjeff "(status=%d, wrid=%d vend_err %x)\n", 512219820Sjeff wc->status, wr_id, wc->vendor_err); 513272225Sglebius if_inc_counter(dev, IFCOUNTER_IERRORS, 1); 514219820Sjeff if (has_srq) 515219820Sjeff goto repost; 516219820Sjeff else { 517219820Sjeff if (!--p->recv_count) { 518219820Sjeff spin_lock(&priv->lock); 519219820Sjeff list_move(&p->list, &priv->cm.rx_reap_list); 520219820Sjeff queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); 521219820Sjeff spin_unlock(&priv->lock); 522219820Sjeff } 523219820Sjeff return; 524219820Sjeff } 525219820Sjeff } 526219820Sjeff 527219820Sjeff if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { 528219820Sjeff if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 529219820Sjeff p->jiffies = jiffies; 530219820Sjeff /* Move this entry to list head, but do not re-add it 531219820Sjeff * if it has been moved out of list. */ 532219820Sjeff if (p->state == IPOIB_CM_RX_LIVE) 533219820Sjeff list_move(&p->list, &priv->cm.passive_ids); 534219820Sjeff } 535219820Sjeff } 536219820Sjeff 537219820Sjeff memcpy(&saverx, &rx_ring[wr_id], sizeof(saverx)); 538219820Sjeff newmb = ipoib_cm_alloc_rx_mb(priv, &rx_ring[wr_id]); 539219820Sjeff if (unlikely(!newmb)) { 540219820Sjeff /* 541219820Sjeff * If we can't allocate a new RX buffer, dump 542219820Sjeff * this packet and reuse the old buffer. 543219820Sjeff */ 544219820Sjeff ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); 545272225Sglebius if_inc_counter(dev, IFCOUNTER_IERRORS, 1); 546219820Sjeff memcpy(&rx_ring[wr_id], &saverx, sizeof(saverx)); 547219820Sjeff goto repost; 548219820Sjeff } 549219820Sjeff 550219820Sjeff ipoib_cm_dma_unmap_rx(priv, &saverx); 551219820Sjeff 552219820Sjeff ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 553219820Sjeff wc->byte_len, wc->slid); 554219820Sjeff 555219820Sjeff ipoib_dma_mb(priv, mb, wc->byte_len); 556219820Sjeff 557272225Sglebius if_inc_counter(dev, IFCOUNTER_IPACKETS, 1); 558272225Sglebius if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len); 559219820Sjeff 560219820Sjeff mb->m_pkthdr.rcvif = dev; 561219820Sjeff proto = *mtod(mb, uint16_t *); 562219820Sjeff m_adj(mb, IPOIB_ENCAP_LEN); 563219820Sjeff 564219820Sjeff IPOIB_MTAP_PROTO(dev, mb, proto); 565219820Sjeff ipoib_demux(dev, mb, ntohs(proto)); 566219820Sjeff 567219820Sjeffrepost: 568219820Sjeff if (has_srq) { 569219820Sjeff if (unlikely(ipoib_cm_post_receive_srq(priv, wr_id))) 570219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " 571219820Sjeff "for buf %d\n", wr_id); 572219820Sjeff } else { 573219820Sjeff if (unlikely(ipoib_cm_post_receive_nonsrq(priv, p, 574219820Sjeff &priv->cm.rx_wr, 575219820Sjeff priv->cm.rx_sge, 576219820Sjeff wr_id))) { 577219820Sjeff --p->recv_count; 578219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " 579219820Sjeff "for buf %d\n", wr_id); 580219820Sjeff } 581219820Sjeff } 582219820Sjeff} 583219820Sjeff 584219820Sjeffstatic inline int post_send(struct ipoib_dev_priv *priv, 585219820Sjeff struct ipoib_cm_tx *tx, 586219820Sjeff struct ipoib_cm_tx_buf *tx_req, 587219820Sjeff unsigned int wr_id) 588219820Sjeff{ 589219820Sjeff struct ib_send_wr *bad_wr; 590219820Sjeff struct mbuf *mb = tx_req->mb; 591219820Sjeff u64 *mapping = tx_req->mapping; 592219820Sjeff struct mbuf *m; 593219820Sjeff int i; 594219820Sjeff 595219820Sjeff for (m = mb, i = 0; m != NULL; m = m->m_next, i++) { 596219820Sjeff priv->tx_sge[i].addr = mapping[i]; 597219820Sjeff priv->tx_sge[i].length = m->m_len; 598219820Sjeff } 599219820Sjeff priv->tx_wr.num_sge = i; 600219820Sjeff priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; 601219820Sjeff priv->tx_wr.opcode = IB_WR_SEND; 602219820Sjeff 603219820Sjeff return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); 604219820Sjeff} 605219820Sjeff 606219820Sjeffvoid ipoib_cm_send(struct ipoib_dev_priv *priv, struct mbuf *mb, struct ipoib_cm_tx *tx) 607219820Sjeff{ 608219820Sjeff struct ipoib_cm_tx_buf *tx_req; 609219820Sjeff struct ifnet *dev = priv->dev; 610219820Sjeff 611219820Sjeff if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) 612219820Sjeff while (ipoib_poll_tx(priv)); /* nothing */ 613219820Sjeff 614219820Sjeff m_adj(mb, sizeof(struct ipoib_pseudoheader)); 615219820Sjeff if (unlikely(mb->m_pkthdr.len > tx->mtu)) { 616219820Sjeff ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 617219820Sjeff mb->m_pkthdr.len, tx->mtu); 618272225Sglebius if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 619219820Sjeff ipoib_cm_mb_too_long(priv, mb, IPOIB_CM_MTU(tx->mtu)); 620219820Sjeff return; 621219820Sjeff } 622219820Sjeff 623219820Sjeff ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", 624219820Sjeff tx->tx_head, mb->m_pkthdr.len, tx->qp->qp_num); 625219820Sjeff 626219820Sjeff 627219820Sjeff /* 628219820Sjeff * We put the mb into the tx_ring _before_ we call post_send() 629219820Sjeff * because it's entirely possible that the completion handler will 630219820Sjeff * run before we execute anything after the post_send(). That 631219820Sjeff * means we have to make sure everything is properly recorded and 632219820Sjeff * our state is consistent before we call post_send(). 633219820Sjeff */ 634219820Sjeff tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; 635219820Sjeff tx_req->mb = mb; 636219820Sjeff if (unlikely(ipoib_dma_map_tx(priv->ca, (struct ipoib_tx_buf *)tx_req, 637219820Sjeff priv->cm.num_frags))) { 638272225Sglebius if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 639219820Sjeff if (tx_req->mb) 640219820Sjeff m_freem(tx_req->mb); 641219820Sjeff return; 642219820Sjeff } 643219820Sjeff 644219820Sjeff if (unlikely(post_send(priv, tx, tx_req, tx->tx_head & (ipoib_sendq_size - 1)))) { 645219820Sjeff ipoib_warn(priv, "post_send failed\n"); 646272225Sglebius if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 647219820Sjeff ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); 648219820Sjeff m_freem(mb); 649219820Sjeff } else { 650219820Sjeff ++tx->tx_head; 651219820Sjeff 652219820Sjeff if (++priv->tx_outstanding == ipoib_sendq_size) { 653219820Sjeff ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", 654219820Sjeff tx->qp->qp_num); 655219820Sjeff if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) 656219820Sjeff ipoib_warn(priv, "request notify on send CQ failed\n"); 657219820Sjeff dev->if_drv_flags |= IFF_DRV_OACTIVE; 658219820Sjeff } 659219820Sjeff } 660219820Sjeff 661219820Sjeff} 662219820Sjeff 663219820Sjeffvoid ipoib_cm_handle_tx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) 664219820Sjeff{ 665219820Sjeff struct ipoib_cm_tx *tx = wc->qp->qp_context; 666219820Sjeff unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; 667219820Sjeff struct ifnet *dev = priv->dev; 668219820Sjeff struct ipoib_cm_tx_buf *tx_req; 669219820Sjeff 670219820Sjeff ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", 671219820Sjeff wr_id, wc->status); 672219820Sjeff 673219820Sjeff if (unlikely(wr_id >= ipoib_sendq_size)) { 674219820Sjeff ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n", 675219820Sjeff wr_id, ipoib_sendq_size); 676219820Sjeff return; 677219820Sjeff } 678219820Sjeff 679219820Sjeff tx_req = &tx->tx_ring[wr_id]; 680219820Sjeff 681219820Sjeff ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); 682219820Sjeff 683219820Sjeff /* FIXME: is this right? Shouldn't we only increment on success? */ 684272225Sglebius if_inc_counter(dev, IFCOUNTER_OPACKETS, 1); 685219820Sjeff 686219820Sjeff m_freem(tx_req->mb); 687219820Sjeff 688219820Sjeff ++tx->tx_tail; 689219820Sjeff if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 690219820Sjeff (dev->if_drv_flags & IFF_DRV_OACTIVE) != 0 && 691219820Sjeff test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 692219820Sjeff dev->if_drv_flags &= ~IFF_DRV_OACTIVE; 693219820Sjeff 694219820Sjeff if (wc->status != IB_WC_SUCCESS && 695219820Sjeff wc->status != IB_WC_WR_FLUSH_ERR) { 696219820Sjeff struct ipoib_path *path; 697219820Sjeff 698219820Sjeff ipoib_dbg(priv, "failed cm send event " 699219820Sjeff "(status=%d, wrid=%d vend_err %x)\n", 700219820Sjeff wc->status, wr_id, wc->vendor_err); 701219820Sjeff 702219820Sjeff path = tx->path; 703219820Sjeff 704219820Sjeff if (path) { 705219820Sjeff path->cm = NULL; 706219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 707219820Sjeff list_del(&path->list); 708219820Sjeff } 709219820Sjeff 710219820Sjeff if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 711219820Sjeff list_move(&tx->list, &priv->cm.reap_list); 712219820Sjeff queue_work(ipoib_workqueue, &priv->cm.reap_task); 713219820Sjeff } 714219820Sjeff 715219820Sjeff clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); 716219820Sjeff } 717219820Sjeff 718219820Sjeff} 719219820Sjeff 720219820Sjeffint ipoib_cm_dev_open(struct ipoib_dev_priv *priv) 721219820Sjeff{ 722219820Sjeff int ret; 723219820Sjeff 724219820Sjeff if (!IPOIB_CM_SUPPORTED(IF_LLADDR(priv->dev))) 725219820Sjeff return 0; 726219820Sjeff 727219820Sjeff priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, priv); 728219820Sjeff if (IS_ERR(priv->cm.id)) { 729219820Sjeff printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); 730219820Sjeff ret = PTR_ERR(priv->cm.id); 731219820Sjeff goto err_cm; 732219820Sjeff } 733219820Sjeff 734219820Sjeff ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), 735219820Sjeff 0, NULL); 736219820Sjeff if (ret) { 737219820Sjeff printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, 738219820Sjeff IPOIB_CM_IETF_ID | priv->qp->qp_num); 739219820Sjeff goto err_listen; 740219820Sjeff } 741219820Sjeff 742219820Sjeff return 0; 743219820Sjeff 744219820Sjefferr_listen: 745219820Sjeff ib_destroy_cm_id(priv->cm.id); 746219820Sjefferr_cm: 747219820Sjeff priv->cm.id = NULL; 748219820Sjeff return ret; 749219820Sjeff} 750219820Sjeff 751219820Sjeffstatic void ipoib_cm_free_rx_reap_list(struct ipoib_dev_priv *priv) 752219820Sjeff{ 753219820Sjeff struct ipoib_cm_rx *rx, *n; 754219820Sjeff LIST_HEAD(list); 755219820Sjeff 756219820Sjeff spin_lock_irq(&priv->lock); 757219820Sjeff list_splice_init(&priv->cm.rx_reap_list, &list); 758219820Sjeff spin_unlock_irq(&priv->lock); 759219820Sjeff 760219820Sjeff list_for_each_entry_safe(rx, n, &list, list) { 761219820Sjeff ib_destroy_cm_id(rx->id); 762219820Sjeff ib_destroy_qp(rx->qp); 763219820Sjeff if (!ipoib_cm_has_srq(priv)) { 764219820Sjeff ipoib_cm_free_rx_ring(priv, rx->rx_ring); 765219820Sjeff spin_lock_irq(&priv->lock); 766219820Sjeff --priv->cm.nonsrq_conn_qp; 767219820Sjeff spin_unlock_irq(&priv->lock); 768219820Sjeff } 769219820Sjeff kfree(rx); 770219820Sjeff } 771219820Sjeff} 772219820Sjeff 773219820Sjeffvoid ipoib_cm_dev_stop(struct ipoib_dev_priv *priv) 774219820Sjeff{ 775219820Sjeff struct ipoib_cm_rx *p; 776219820Sjeff unsigned long begin; 777219820Sjeff int ret; 778219820Sjeff 779219820Sjeff if (!IPOIB_CM_SUPPORTED(IF_LLADDR(priv->dev)) || !priv->cm.id) 780219820Sjeff return; 781219820Sjeff 782219820Sjeff ib_destroy_cm_id(priv->cm.id); 783219820Sjeff priv->cm.id = NULL; 784219820Sjeff 785219820Sjeff cancel_work_sync(&priv->cm.rx_reap_task); 786219820Sjeff 787219820Sjeff spin_lock_irq(&priv->lock); 788219820Sjeff while (!list_empty(&priv->cm.passive_ids)) { 789219820Sjeff p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); 790219820Sjeff list_move(&p->list, &priv->cm.rx_error_list); 791219820Sjeff p->state = IPOIB_CM_RX_ERROR; 792219820Sjeff spin_unlock_irq(&priv->lock); 793219820Sjeff ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 794219820Sjeff if (ret) 795219820Sjeff ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 796219820Sjeff spin_lock_irq(&priv->lock); 797219820Sjeff } 798219820Sjeff 799219820Sjeff /* Wait for all RX to be drained */ 800219820Sjeff begin = jiffies; 801219820Sjeff 802219820Sjeff while (!list_empty(&priv->cm.rx_error_list) || 803219820Sjeff !list_empty(&priv->cm.rx_flush_list) || 804219820Sjeff !list_empty(&priv->cm.rx_drain_list)) { 805219820Sjeff if (time_after(jiffies, begin + 5 * HZ)) { 806219820Sjeff ipoib_warn(priv, "RX drain timing out\n"); 807219820Sjeff 808219820Sjeff /* 809219820Sjeff * assume the HW is wedged and just free up everything. 810219820Sjeff */ 811219820Sjeff list_splice_init(&priv->cm.rx_flush_list, 812219820Sjeff &priv->cm.rx_reap_list); 813219820Sjeff list_splice_init(&priv->cm.rx_error_list, 814219820Sjeff &priv->cm.rx_reap_list); 815219820Sjeff list_splice_init(&priv->cm.rx_drain_list, 816219820Sjeff &priv->cm.rx_reap_list); 817219820Sjeff break; 818219820Sjeff } 819219820Sjeff spin_unlock_irq(&priv->lock); 820219820Sjeff msleep(1); 821219820Sjeff ipoib_drain_cq(priv); 822219820Sjeff spin_lock_irq(&priv->lock); 823219820Sjeff } 824219820Sjeff 825219820Sjeff spin_unlock_irq(&priv->lock); 826219820Sjeff 827219820Sjeff ipoib_cm_free_rx_reap_list(priv); 828219820Sjeff 829219820Sjeff cancel_delayed_work(&priv->cm.stale_task); 830219820Sjeff} 831219820Sjeff 832219820Sjeffstatic int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 833219820Sjeff{ 834219820Sjeff struct ipoib_cm_tx *p = cm_id->context; 835219820Sjeff struct ipoib_dev_priv *priv = p->priv; 836219820Sjeff struct ipoib_cm_data *data = event->private_data; 837219820Sjeff struct ifqueue mbqueue; 838219820Sjeff struct ib_qp_attr qp_attr; 839219820Sjeff int qp_attr_mask, ret; 840219820Sjeff struct mbuf *mb; 841219820Sjeff 842219820Sjeff ipoib_dbg(priv, "cm rep handler\n"); 843219820Sjeff p->mtu = be32_to_cpu(data->mtu); 844219820Sjeff 845219820Sjeff if (p->mtu <= IPOIB_ENCAP_LEN) { 846219820Sjeff ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n", 847219820Sjeff p->mtu, IPOIB_ENCAP_LEN); 848219820Sjeff return -EINVAL; 849219820Sjeff } 850219820Sjeff 851219820Sjeff qp_attr.qp_state = IB_QPS_RTR; 852219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 853219820Sjeff if (ret) { 854219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); 855219820Sjeff return ret; 856219820Sjeff } 857219820Sjeff 858219820Sjeff qp_attr.rq_psn = 0 /* FIXME */; 859219820Sjeff ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); 860219820Sjeff if (ret) { 861219820Sjeff ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); 862219820Sjeff return ret; 863219820Sjeff } 864219820Sjeff 865219820Sjeff qp_attr.qp_state = IB_QPS_RTS; 866219820Sjeff ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 867219820Sjeff if (ret) { 868219820Sjeff ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); 869219820Sjeff return ret; 870219820Sjeff } 871219820Sjeff ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); 872219820Sjeff if (ret) { 873219820Sjeff ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); 874219820Sjeff return ret; 875219820Sjeff } 876219820Sjeff 877219820Sjeff bzero(&mbqueue, sizeof(mbqueue)); 878219820Sjeff 879219820Sjeff spin_lock_irq(&priv->lock); 880219820Sjeff set_bit(IPOIB_FLAG_OPER_UP, &p->flags); 881219820Sjeff if (p->path) 882219820Sjeff for (;;) { 883219820Sjeff _IF_DEQUEUE(&p->path->queue, mb); 884219820Sjeff if (mb == NULL) 885219820Sjeff break; 886219820Sjeff _IF_ENQUEUE(&mbqueue, mb); 887219820Sjeff } 888219820Sjeff spin_unlock_irq(&priv->lock); 889219820Sjeff 890219820Sjeff for (;;) { 891219820Sjeff struct ifnet *dev = p->priv->dev; 892219820Sjeff _IF_DEQUEUE(&mbqueue, mb); 893219820Sjeff if (mb == NULL) 894219820Sjeff break; 895219820Sjeff mb->m_pkthdr.rcvif = dev; 896219820Sjeff if (dev->if_transmit(dev, mb)) 897219820Sjeff ipoib_warn(priv, "dev_queue_xmit failed " 898219820Sjeff "to requeue packet\n"); 899219820Sjeff } 900219820Sjeff 901219820Sjeff ret = ib_send_cm_rtu(cm_id, NULL, 0); 902219820Sjeff if (ret) { 903219820Sjeff ipoib_warn(priv, "failed to send RTU: %d\n", ret); 904219820Sjeff return ret; 905219820Sjeff } 906219820Sjeff return 0; 907219820Sjeff} 908219820Sjeff 909219820Sjeffstatic struct ib_qp *ipoib_cm_create_tx_qp(struct ipoib_dev_priv *priv, 910219820Sjeff struct ipoib_cm_tx *tx) 911219820Sjeff{ 912219820Sjeff struct ib_qp_init_attr attr = { 913219820Sjeff .send_cq = priv->send_cq, 914219820Sjeff .recv_cq = priv->recv_cq, 915219820Sjeff .srq = priv->cm.srq, 916219820Sjeff .cap.max_send_wr = ipoib_sendq_size, 917219820Sjeff .cap.max_send_sge = priv->cm.num_frags, 918219820Sjeff .sq_sig_type = IB_SIGNAL_ALL_WR, 919219820Sjeff .qp_type = IB_QPT_RC, 920219820Sjeff .qp_context = tx 921219820Sjeff }; 922219820Sjeff 923219820Sjeff return ib_create_qp(priv->pd, &attr); 924219820Sjeff} 925219820Sjeff 926219820Sjeffstatic int ipoib_cm_send_req(struct ipoib_dev_priv *priv, 927219820Sjeff struct ib_cm_id *id, struct ib_qp *qp, 928219820Sjeff u32 qpn, 929219820Sjeff struct ib_sa_path_rec *pathrec) 930219820Sjeff{ 931219820Sjeff struct ipoib_cm_data data = {}; 932219820Sjeff struct ib_cm_req_param req = {}; 933219820Sjeff 934219820Sjeff ipoib_dbg(priv, "cm send req\n"); 935219820Sjeff 936219820Sjeff data.qpn = cpu_to_be32(priv->qp->qp_num); 937219820Sjeff data.mtu = cpu_to_be32(priv->cm.max_cm_mtu); 938219820Sjeff 939219820Sjeff req.primary_path = pathrec; 940219820Sjeff req.alternate_path = NULL; 941219820Sjeff req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); 942219820Sjeff req.qp_num = qp->qp_num; 943219820Sjeff req.qp_type = qp->qp_type; 944219820Sjeff req.private_data = &data; 945219820Sjeff req.private_data_len = sizeof data; 946219820Sjeff req.flow_control = 0; 947219820Sjeff 948219820Sjeff req.starting_psn = 0; /* FIXME */ 949219820Sjeff 950219820Sjeff /* 951219820Sjeff * Pick some arbitrary defaults here; we could make these 952219820Sjeff * module parameters if anyone cared about setting them. 953219820Sjeff */ 954219820Sjeff req.responder_resources = 4; 955219820Sjeff req.remote_cm_response_timeout = 20; 956219820Sjeff req.local_cm_response_timeout = 20; 957219820Sjeff req.retry_count = 0; /* RFC draft warns against retries */ 958219820Sjeff req.rnr_retry_count = 0; /* RFC draft warns against retries */ 959219820Sjeff req.max_cm_retries = 15; 960219820Sjeff req.srq = ipoib_cm_has_srq(priv); 961219820Sjeff return ib_send_cm_req(id, &req); 962219820Sjeff} 963219820Sjeff 964219820Sjeffstatic int ipoib_cm_modify_tx_init(struct ipoib_dev_priv *priv, 965219820Sjeff struct ib_cm_id *cm_id, struct ib_qp *qp) 966219820Sjeff{ 967219820Sjeff struct ib_qp_attr qp_attr; 968219820Sjeff int qp_attr_mask, ret; 969219820Sjeff ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index); 970219820Sjeff if (ret) { 971219820Sjeff ipoib_warn(priv, "pkey 0x%x not found: %d\n", priv->pkey, ret); 972219820Sjeff return ret; 973219820Sjeff } 974219820Sjeff 975219820Sjeff qp_attr.qp_state = IB_QPS_INIT; 976219820Sjeff qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; 977219820Sjeff qp_attr.port_num = priv->port; 978219820Sjeff qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; 979219820Sjeff 980219820Sjeff ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 981219820Sjeff if (ret) { 982219820Sjeff ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret); 983219820Sjeff return ret; 984219820Sjeff } 985219820Sjeff return 0; 986219820Sjeff} 987219820Sjeff 988219820Sjeffstatic int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, 989219820Sjeff struct ib_sa_path_rec *pathrec) 990219820Sjeff{ 991219820Sjeff struct ipoib_dev_priv *priv = p->priv; 992219820Sjeff int ret; 993219820Sjeff 994219820Sjeff p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, GFP_KERNEL); 995219820Sjeff if (!p->tx_ring) { 996219820Sjeff ipoib_warn(priv, "failed to allocate tx ring\n"); 997219820Sjeff ret = -ENOMEM; 998219820Sjeff goto err_tx; 999219820Sjeff } 1000219820Sjeff memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); 1001219820Sjeff 1002219820Sjeff p->qp = ipoib_cm_create_tx_qp(p->priv, p); 1003219820Sjeff if (IS_ERR(p->qp)) { 1004219820Sjeff ret = PTR_ERR(p->qp); 1005219820Sjeff ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); 1006219820Sjeff goto err_qp; 1007219820Sjeff } 1008219820Sjeff 1009219820Sjeff p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p); 1010219820Sjeff if (IS_ERR(p->id)) { 1011219820Sjeff ret = PTR_ERR(p->id); 1012219820Sjeff ipoib_warn(priv, "failed to create tx cm id: %d\n", ret); 1013219820Sjeff goto err_id; 1014219820Sjeff } 1015219820Sjeff 1016219820Sjeff ret = ipoib_cm_modify_tx_init(p->priv, p->id, p->qp); 1017219820Sjeff if (ret) { 1018219820Sjeff ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret); 1019219820Sjeff goto err_modify; 1020219820Sjeff } 1021219820Sjeff 1022219820Sjeff ret = ipoib_cm_send_req(p->priv, p->id, p->qp, qpn, pathrec); 1023219820Sjeff if (ret) { 1024219820Sjeff ipoib_warn(priv, "failed to send cm req: %d\n", ret); 1025219820Sjeff goto err_send_cm; 1026219820Sjeff } 1027219820Sjeff 1028219820Sjeff ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n", 1029219820Sjeff p->qp->qp_num, pathrec->dgid.raw, qpn); 1030219820Sjeff 1031219820Sjeff return 0; 1032219820Sjeff 1033219820Sjefferr_send_cm: 1034219820Sjefferr_modify: 1035219820Sjeff ib_destroy_cm_id(p->id); 1036219820Sjefferr_id: 1037219820Sjeff p->id = NULL; 1038219820Sjeff ib_destroy_qp(p->qp); 1039219820Sjefferr_qp: 1040219820Sjeff p->qp = NULL; 1041219820Sjeff kfree(p->tx_ring); 1042219820Sjefferr_tx: 1043219820Sjeff return ret; 1044219820Sjeff} 1045219820Sjeff 1046219820Sjeffstatic void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) 1047219820Sjeff{ 1048219820Sjeff struct ipoib_dev_priv *priv = p->priv; 1049219820Sjeff struct ifnet *dev = priv->dev; 1050219820Sjeff struct ipoib_cm_tx_buf *tx_req; 1051219820Sjeff unsigned long begin; 1052219820Sjeff 1053219820Sjeff ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", 1054219820Sjeff p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); 1055219820Sjeff 1056219820Sjeff if (p->path) 1057219820Sjeff ipoib_path_free(priv, p->path); 1058219820Sjeff 1059219820Sjeff if (p->id) 1060219820Sjeff ib_destroy_cm_id(p->id); 1061219820Sjeff 1062219820Sjeff if (p->tx_ring) { 1063219820Sjeff /* Wait for all sends to complete */ 1064219820Sjeff begin = jiffies; 1065219820Sjeff while ((int) p->tx_tail - (int) p->tx_head < 0) { 1066219820Sjeff if (time_after(jiffies, begin + 5 * HZ)) { 1067219820Sjeff ipoib_warn(priv, "timing out; %d sends not completed\n", 1068219820Sjeff p->tx_head - p->tx_tail); 1069219820Sjeff goto timeout; 1070219820Sjeff } 1071219820Sjeff 1072219820Sjeff msleep(1); 1073219820Sjeff } 1074219820Sjeff } 1075219820Sjeff 1076219820Sjefftimeout: 1077219820Sjeff 1078219820Sjeff while ((int) p->tx_tail - (int) p->tx_head < 0) { 1079219820Sjeff tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 1080219820Sjeff ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); 1081219820Sjeff m_freem(tx_req->mb); 1082219820Sjeff ++p->tx_tail; 1083219820Sjeff if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && 1084219820Sjeff (dev->if_drv_flags & IFF_DRV_OACTIVE) != 0 && 1085219820Sjeff test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 1086219820Sjeff dev->if_drv_flags &= ~IFF_DRV_OACTIVE; 1087219820Sjeff } 1088219820Sjeff 1089219820Sjeff if (p->qp) 1090219820Sjeff ib_destroy_qp(p->qp); 1091219820Sjeff 1092219820Sjeff kfree(p->tx_ring); 1093219820Sjeff kfree(p); 1094219820Sjeff} 1095219820Sjeff 1096219820Sjeffstatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 1097219820Sjeff struct ib_cm_event *event) 1098219820Sjeff{ 1099219820Sjeff struct ipoib_cm_tx *tx = cm_id->context; 1100219820Sjeff struct ipoib_dev_priv *priv = tx->priv; 1101219820Sjeff struct ipoib_path *path; 1102219820Sjeff unsigned long flags; 1103219820Sjeff int ret; 1104219820Sjeff 1105219820Sjeff switch (event->event) { 1106219820Sjeff case IB_CM_DREQ_RECEIVED: 1107219820Sjeff ipoib_dbg(priv, "DREQ received.\n"); 1108219820Sjeff ib_send_cm_drep(cm_id, NULL, 0); 1109219820Sjeff break; 1110219820Sjeff case IB_CM_REP_RECEIVED: 1111219820Sjeff ipoib_dbg(priv, "REP received.\n"); 1112219820Sjeff ret = ipoib_cm_rep_handler(cm_id, event); 1113219820Sjeff if (ret) 1114219820Sjeff ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, 1115219820Sjeff NULL, 0, NULL, 0); 1116219820Sjeff break; 1117219820Sjeff case IB_CM_REQ_ERROR: 1118219820Sjeff case IB_CM_REJ_RECEIVED: 1119219820Sjeff case IB_CM_TIMEWAIT_EXIT: 1120219820Sjeff ipoib_dbg(priv, "CM error %d.\n", event->event); 1121219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1122219820Sjeff path = tx->path; 1123219820Sjeff 1124219820Sjeff if (path) { 1125219820Sjeff path->cm = NULL; 1126219820Sjeff tx->path = NULL; 1127219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 1128219820Sjeff list_del(&path->list); 1129219820Sjeff } 1130219820Sjeff 1131219820Sjeff if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1132219820Sjeff list_move(&tx->list, &priv->cm.reap_list); 1133219820Sjeff queue_work(ipoib_workqueue, &priv->cm.reap_task); 1134219820Sjeff } 1135219820Sjeff 1136219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1137219820Sjeff if (path) 1138219820Sjeff ipoib_path_free(tx->priv, path); 1139219820Sjeff break; 1140219820Sjeff default: 1141219820Sjeff break; 1142219820Sjeff } 1143219820Sjeff 1144219820Sjeff return 0; 1145219820Sjeff} 1146219820Sjeff 1147219820Sjeffstruct ipoib_cm_tx *ipoib_cm_create_tx(struct ipoib_dev_priv *priv, 1148219820Sjeff struct ipoib_path *path) 1149219820Sjeff{ 1150219820Sjeff struct ipoib_cm_tx *tx; 1151219820Sjeff 1152219820Sjeff tx = kzalloc(sizeof *tx, GFP_ATOMIC); 1153219820Sjeff if (!tx) 1154219820Sjeff return NULL; 1155219820Sjeff 1156219820Sjeff ipoib_dbg(priv, "Creating cm tx\n"); 1157219820Sjeff path->cm = tx; 1158219820Sjeff tx->path = path; 1159219820Sjeff tx->priv = priv; 1160219820Sjeff list_add(&tx->list, &priv->cm.start_list); 1161219820Sjeff set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); 1162219820Sjeff queue_work(ipoib_workqueue, &priv->cm.start_task); 1163219820Sjeff return tx; 1164219820Sjeff} 1165219820Sjeff 1166219820Sjeffvoid ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) 1167219820Sjeff{ 1168219820Sjeff struct ipoib_dev_priv *priv = tx->priv; 1169219820Sjeff if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1170219820Sjeff spin_lock(&priv->lock); 1171219820Sjeff list_move(&tx->list, &priv->cm.reap_list); 1172219820Sjeff spin_unlock(&priv->lock); 1173219820Sjeff queue_work(ipoib_workqueue, &priv->cm.reap_task); 1174219820Sjeff ipoib_dbg(priv, "Reap connection for gid %pI6\n", 1175219820Sjeff tx->path->pathrec.dgid.raw); 1176219820Sjeff tx->path = NULL; 1177219820Sjeff } 1178219820Sjeff} 1179219820Sjeff 1180219820Sjeffstatic void ipoib_cm_tx_start(struct work_struct *work) 1181219820Sjeff{ 1182219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1183219820Sjeff cm.start_task); 1184219820Sjeff struct ipoib_path *path; 1185219820Sjeff struct ipoib_cm_tx *p; 1186219820Sjeff unsigned long flags; 1187219820Sjeff int ret; 1188219820Sjeff 1189219820Sjeff struct ib_sa_path_rec pathrec; 1190219820Sjeff u32 qpn; 1191219820Sjeff 1192219820Sjeff ipoib_dbg(priv, "cm start task\n"); 1193219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1194219820Sjeff 1195219820Sjeff while (!list_empty(&priv->cm.start_list)) { 1196219820Sjeff p = list_entry(priv->cm.start_list.next, typeof(*p), list); 1197219820Sjeff list_del_init(&p->list); 1198219820Sjeff path = p->path; 1199219820Sjeff qpn = IPOIB_QPN(path->hwaddr); 1200219820Sjeff memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); 1201219820Sjeff 1202219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1203219820Sjeff 1204219820Sjeff ret = ipoib_cm_tx_init(p, qpn, &pathrec); 1205219820Sjeff 1206219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1207219820Sjeff 1208219820Sjeff if (ret) { 1209219820Sjeff path = p->path; 1210219820Sjeff if (path) { 1211219820Sjeff path->cm = NULL; 1212219820Sjeff rb_erase(&path->rb_node, &priv->path_tree); 1213219820Sjeff list_del(&path->list); 1214219820Sjeff ipoib_path_free(priv, path); 1215219820Sjeff } 1216219820Sjeff list_del(&p->list); 1217219820Sjeff kfree(p); 1218219820Sjeff } 1219219820Sjeff } 1220219820Sjeff 1221219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1222219820Sjeff} 1223219820Sjeff 1224219820Sjeffstatic void ipoib_cm_tx_reap(struct work_struct *work) 1225219820Sjeff{ 1226219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1227219820Sjeff cm.reap_task); 1228219820Sjeff struct ipoib_cm_tx *p; 1229219820Sjeff unsigned long flags; 1230219820Sjeff 1231219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1232219820Sjeff 1233219820Sjeff while (!list_empty(&priv->cm.reap_list)) { 1234219820Sjeff p = list_entry(priv->cm.reap_list.next, typeof(*p), list); 1235219820Sjeff list_del(&p->list); 1236219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1237219820Sjeff ipoib_cm_tx_destroy(p); 1238219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1239219820Sjeff } 1240219820Sjeff 1241219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1242219820Sjeff} 1243219820Sjeff 1244219820Sjeffstatic void ipoib_cm_mb_reap(struct work_struct *work) 1245219820Sjeff{ 1246219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1247219820Sjeff cm.mb_task); 1248219820Sjeff struct mbuf *mb; 1249219820Sjeff unsigned long flags; 1250234183Sjhb#if defined(INET) || defined(INET6) 1251219820Sjeff unsigned mtu = priv->mcast_mtu; 1252234183Sjhb#endif 1253219820Sjeff uint16_t proto; 1254219820Sjeff 1255219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1256219820Sjeff 1257219820Sjeff for (;;) { 1258219820Sjeff IF_DEQUEUE(&priv->cm.mb_queue, mb); 1259219820Sjeff if (mb == NULL) 1260219820Sjeff break; 1261219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1262219820Sjeff 1263219820Sjeff proto = htons(*mtod(mb, uint16_t *)); 1264219820Sjeff m_adj(mb, IPOIB_ENCAP_LEN); 1265234183Sjhb switch (proto) { 1266234183Sjhb#if defined(INET) 1267234183Sjhb case ETHERTYPE_IP: 1268219820Sjeff icmp_error(mb, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, mtu); 1269234183Sjhb break; 1270234183Sjhb#endif 1271219820Sjeff#if defined(INET6) 1272234183Sjhb case ETHERTYPE_IPV6: 1273219820Sjeff icmp6_error(mb, ICMP6_PACKET_TOO_BIG, 0, mtu); 1274234183Sjhb break; 1275219820Sjeff#endif 1276234183Sjhb default: 1277219820Sjeff m_freem(mb); 1278234183Sjhb } 1279219820Sjeff 1280219820Sjeff spin_lock_irqsave(&priv->lock, flags); 1281219820Sjeff } 1282219820Sjeff 1283219820Sjeff spin_unlock_irqrestore(&priv->lock, flags); 1284219820Sjeff} 1285219820Sjeff 1286219820Sjeffvoid 1287219820Sjeffipoib_cm_mb_too_long(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int mtu) 1288219820Sjeff{ 1289219820Sjeff int e = priv->cm.mb_queue.ifq_len; 1290219820Sjeff 1291219820Sjeff IF_ENQUEUE(&priv->cm.mb_queue, mb); 1292219820Sjeff if (e == 0) 1293219820Sjeff queue_work(ipoib_workqueue, &priv->cm.mb_task); 1294219820Sjeff} 1295219820Sjeff 1296219820Sjeffstatic void ipoib_cm_rx_reap(struct work_struct *work) 1297219820Sjeff{ 1298219820Sjeff ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv, 1299219820Sjeff cm.rx_reap_task)); 1300219820Sjeff} 1301219820Sjeff 1302219820Sjeffstatic void ipoib_cm_stale_task(struct work_struct *work) 1303219820Sjeff{ 1304219820Sjeff struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1305219820Sjeff cm.stale_task.work); 1306219820Sjeff struct ipoib_cm_rx *p; 1307219820Sjeff int ret; 1308219820Sjeff 1309219820Sjeff spin_lock_irq(&priv->lock); 1310219820Sjeff while (!list_empty(&priv->cm.passive_ids)) { 1311219820Sjeff /* List is sorted by LRU, start from tail, 1312219820Sjeff * stop when we see a recently used entry */ 1313219820Sjeff p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); 1314219820Sjeff if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) 1315219820Sjeff break; 1316219820Sjeff list_move(&p->list, &priv->cm.rx_error_list); 1317219820Sjeff p->state = IPOIB_CM_RX_ERROR; 1318219820Sjeff spin_unlock_irq(&priv->lock); 1319219820Sjeff ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 1320219820Sjeff if (ret) 1321219820Sjeff ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 1322219820Sjeff spin_lock_irq(&priv->lock); 1323219820Sjeff } 1324219820Sjeff 1325219820Sjeff if (!list_empty(&priv->cm.passive_ids)) 1326219820Sjeff queue_delayed_work(ipoib_workqueue, 1327219820Sjeff &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 1328219820Sjeff spin_unlock_irq(&priv->lock); 1329219820Sjeff} 1330219820Sjeff 1331219820Sjeff 1332219820Sjeffstatic void ipoib_cm_create_srq(struct ipoib_dev_priv *priv, int max_sge) 1333219820Sjeff{ 1334219820Sjeff struct ib_srq_init_attr srq_init_attr = { 1335219820Sjeff .attr = { 1336219820Sjeff .max_wr = ipoib_recvq_size, 1337219820Sjeff .max_sge = max_sge 1338219820Sjeff } 1339219820Sjeff }; 1340219820Sjeff 1341219820Sjeff priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); 1342219820Sjeff if (IS_ERR(priv->cm.srq)) { 1343219820Sjeff if (PTR_ERR(priv->cm.srq) != -ENOSYS) 1344219820Sjeff printk(KERN_WARNING "%s: failed to allocate SRQ, error %ld\n", 1345219820Sjeff priv->ca->name, PTR_ERR(priv->cm.srq)); 1346219820Sjeff priv->cm.srq = NULL; 1347219820Sjeff return; 1348219820Sjeff } 1349219820Sjeff 1350219820Sjeff priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, GFP_KERNEL); 1351219820Sjeff if (!priv->cm.srq_ring) { 1352219820Sjeff printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", 1353219820Sjeff priv->ca->name, ipoib_recvq_size); 1354219820Sjeff ib_destroy_srq(priv->cm.srq); 1355219820Sjeff priv->cm.srq = NULL; 1356219820Sjeff return; 1357219820Sjeff } 1358219820Sjeff 1359219820Sjeff memset(priv->cm.srq_ring, 0, ipoib_recvq_size * sizeof *priv->cm.srq_ring); 1360219820Sjeff} 1361219820Sjeff 1362219820Sjeffint ipoib_cm_dev_init(struct ipoib_dev_priv *priv) 1363219820Sjeff{ 1364219820Sjeff struct ifnet *dev = priv->dev; 1365219820Sjeff int i, ret; 1366219820Sjeff struct ib_device_attr attr; 1367219820Sjeff 1368219820Sjeff INIT_LIST_HEAD(&priv->cm.passive_ids); 1369219820Sjeff INIT_LIST_HEAD(&priv->cm.reap_list); 1370219820Sjeff INIT_LIST_HEAD(&priv->cm.start_list); 1371219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_error_list); 1372219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_flush_list); 1373219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_drain_list); 1374219820Sjeff INIT_LIST_HEAD(&priv->cm.rx_reap_list); 1375219820Sjeff INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); 1376219820Sjeff INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); 1377219820Sjeff INIT_WORK(&priv->cm.mb_task, ipoib_cm_mb_reap); 1378219820Sjeff INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap); 1379219820Sjeff INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); 1380219820Sjeff 1381219820Sjeff bzero(&priv->cm.mb_queue, sizeof(priv->cm.mb_queue)); 1382219820Sjeff mtx_init(&priv->cm.mb_queue.ifq_mtx, 1383219820Sjeff dev->if_xname, "if send queue", MTX_DEF); 1384219820Sjeff 1385219820Sjeff ret = ib_query_device(priv->ca, &attr); 1386219820Sjeff if (ret) { 1387219820Sjeff printk(KERN_WARNING "ib_query_device() failed with %d\n", ret); 1388219820Sjeff return ret; 1389219820Sjeff } 1390219820Sjeff 1391219820Sjeff ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge); 1392219820Sjeff 1393219820Sjeff attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge); 1394219820Sjeff ipoib_cm_create_srq(priv, attr.max_srq_sge); 1395219820Sjeff if (ipoib_cm_has_srq(priv)) { 1396219820Sjeff priv->cm.max_cm_mtu = attr.max_srq_sge * MJUMPAGESIZE; 1397219820Sjeff priv->cm.num_frags = attr.max_srq_sge; 1398219820Sjeff ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n", 1399219820Sjeff priv->cm.max_cm_mtu, priv->cm.num_frags); 1400219820Sjeff } else { 1401219820Sjeff priv->cm.max_cm_mtu = IPOIB_CM_MAX_MTU; 1402219820Sjeff priv->cm.num_frags = IPOIB_CM_RX_SG; 1403219820Sjeff } 1404219820Sjeff 1405219820Sjeff ipoib_cm_init_rx_wr(priv, &priv->cm.rx_wr, priv->cm.rx_sge); 1406219820Sjeff 1407219820Sjeff if (ipoib_cm_has_srq(priv)) { 1408219820Sjeff for (i = 0; i < ipoib_recvq_size; ++i) { 1409219820Sjeff if (!ipoib_cm_alloc_rx_mb(priv, &priv->cm.srq_ring[i])) { 1410219820Sjeff ipoib_warn(priv, "failed to allocate " 1411219820Sjeff "receive buffer %d\n", i); 1412219820Sjeff ipoib_cm_dev_cleanup(priv); 1413219820Sjeff return -ENOMEM; 1414219820Sjeff } 1415219820Sjeff 1416219820Sjeff if (ipoib_cm_post_receive_srq(priv, i)) { 1417219820Sjeff ipoib_warn(priv, "ipoib_cm_post_receive_srq " 1418219820Sjeff "failed for buf %d\n", i); 1419219820Sjeff ipoib_cm_dev_cleanup(priv); 1420219820Sjeff return -EIO; 1421219820Sjeff } 1422219820Sjeff } 1423219820Sjeff } 1424219820Sjeff 1425219820Sjeff IF_LLADDR(priv->dev)[0] = IPOIB_FLAGS_RC; 1426219820Sjeff return 0; 1427219820Sjeff} 1428219820Sjeff 1429219820Sjeffvoid ipoib_cm_dev_cleanup(struct ipoib_dev_priv *priv) 1430219820Sjeff{ 1431219820Sjeff int ret; 1432219820Sjeff 1433219820Sjeff if (!priv->cm.srq) 1434219820Sjeff return; 1435219820Sjeff 1436219820Sjeff ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); 1437219820Sjeff 1438219820Sjeff ret = ib_destroy_srq(priv->cm.srq); 1439219820Sjeff if (ret) 1440219820Sjeff ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret); 1441219820Sjeff 1442219820Sjeff priv->cm.srq = NULL; 1443219820Sjeff if (!priv->cm.srq_ring) 1444219820Sjeff return; 1445219820Sjeff 1446219820Sjeff ipoib_cm_free_rx_ring(priv, priv->cm.srq_ring); 1447219820Sjeff priv->cm.srq_ring = NULL; 1448219820Sjeff 1449219820Sjeff mtx_destroy(&priv->cm.mb_queue.ifq_mtx); 1450219820Sjeff} 1451219820Sjeff 1452219820Sjeff#endif /* CONFIG_INFINIBAND_IPOIB_CM */ 1453