1/* 2 * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33#include <linux/mlx4/cq.h> 34#include <linux/mlx4/qp.h> 35 36#include <linux/net/ethernet.h> 37 38#include <linux/page.h> 39#include <linux/if_vlan.h> 40#include <linux/vmalloc.h> 41#include <linux/moduleparam.h> 42#include <linux/mm.h> 43#include <linux/io.h> 44 45#include <net/if_vlan_var.h> 46 47#include <netinet/ip.h> 48#include <netinet/ip6.h> 49#include <netinet/tcp.h> 50 51 52#include <netinet/in_systm.h> 53/*#include <netinet/in.h> 54 #include <netinet/if_ether.h> 55 #include <netinet/ip.h> 56 #include <netinet/ip6.h> 57 #include <netinet/tcp.h> 58 #include <netinet/tcp_lro.h> 59 #include <netinet/udp.h>*/ 60 61#include <debug.h> 62 63#include "mlx4_en.h" 64#include "mlx4_devif_queue.h" 65#include <net_interfaces/flags.h> 66/*#include "utils.h"*/ 67 68enum { 69 MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ 70 MAX_BF = 256, MIN_PKT_LEN = 17, 71}; 72 73static int inline_thold /*__read_mostly*/= MAX_INLINE; 74 75/*module_param_named(inline_thold, inline_thold, uint, 0444); 76 MODULE_PARM_DESC(inline_thold, "threshold for using inline data");*/ 77 78int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, 79 struct mlx4_en_tx_ring **pring, u32 size, u16 stride, int node, 80 int queue_idx) { 81 struct mlx4_en_dev *mdev = priv->mdev; 82 struct mlx4_en_tx_ring *ring; 83 /*uint32_t x;*/ 84 int tmp; 85 int err; 86 87 /*ring = kzalloc_node(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL, node); 88 if (!ring) {*/ 89 ring = calloc(1, sizeof(struct mlx4_en_tx_ring)); 90 if (!ring) { 91 MLX4_ERR("Failed allocating TX ring\n"); 92 return -ENOMEM; 93 } 94 /*}*/ 95 96 /* Create DMA descriptor TAG */ 97 /*if ((err = -bus_dma_tag_create( 98 bus_get_dma_tag(mdev->pdev->dev.bsddev), 99 1, any alignment 100 0, no boundary 101 BUS_SPACE_MAXADDR, lowaddr 102 BUS_SPACE_MAXADDR, highaddr 103 NULL, NULL, filter, filterarg 104 MLX4_EN_TX_MAX_PAYLOAD_SIZE, maxsize 105 MLX4_EN_TX_MAX_MBUF_FRAGS, nsegments 106 MLX4_EN_TX_MAX_MBUF_SIZE, maxsegsize 107 0, flags 108 NULL, NULL, lockfunc, lockfuncarg 109 &ring->dma_tag))) 110 goto done;*/ 111 112 ring->size = size; 113 ring->size_mask = size - 1; 114 ring->stride = stride; 115 ring->inline_thold = MAX(MIN_PKT_LEN, MIN(inline_thold, MAX_INLINE)); 116 /*mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF); 117 mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF);*/ 118 119 /* Allocate the buf ring */ 120 /*ring->br = buf_ring_alloc(MLX4_EN_DEF_TX_QUEUE_SIZE, M_DEVBUF, M_WAITOK, 121 &ring->tx_lock.m); 122 if (ring->br == NULL) { 123 MLX4_ERR("Failed allocating tx_info ring\n"); 124 err = -ENOMEM; 125 goto err_free_dma_tag; 126 }*/ 127 128 tmp = size * sizeof(struct mlx4_en_tx_info); 129 /*ring->tx_info = kzalloc_node(tmp, GFP_KERNEL, node); 130 if (!ring->tx_info) {*/ 131 ring->tx_info = calloc(1, tmp); 132 if (!ring->tx_info) { 133 err = -ENOMEM; 134 goto err_ring; 135 } 136 /*}*/ 137 138 /* Create DMA descriptor MAPs */ 139 /*for (x = 0; x != size; x++) { 140 err = -bus_dmamap_create(ring->dma_tag, 0, &ring->tx_info[x].dma_map); 141 if (err != 0) { 142 while (x--) { 143 bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map); 144 } 145 goto err_info; 146 } 147 }*/ 148 149 MLX4_DEBUG("Allocated tx_info ring at addr:%p size:%d\n", ring->tx_info, 150 tmp); 151 152 ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); 153 154 /* Allocate HW buffers on provided NUMA node */ 155 err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 156 2 * BASE_PAGE_SIZE); 157 if (err) { 158 MLX4_ERR("Failed allocating hwq resources\n"); 159 goto err_dma_map; 160 } 161 162 err = mlx4_en_map_buffer(&ring->wqres.buf); 163 if (err) { 164 MLX4_ERR("Failed to map TX buffer\n"); 165 goto err_hwq_res; 166 } 167 168 ring->buf = ring->wqres.buf.direct.buf; 169 170 MLX4_DEBUG("Allocated TX ring (addr:%p) - buf:%p size:%d " 171 "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size, 172 ring->buf_size, (unsigned long long ) ring->wqres.buf.direct.map); 173 174 err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, 175 MLX4_RESERVE_BF_QP); 176 if (err) { 177 MLX4_ERR("failed reserving qp for TX ring\n"); 178 goto err_map; 179 } 180 181 err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); 182 if (err) { 183 MLX4_ERR("Failed allocating qp %d\n", ring->qpn); 184 goto err_reserve; 185 } 186 ring->qp.event = mlx4_en_sqp_event; 187 188 err = mlx4_bf_alloc(mdev->dev, &ring->bf, node); 189 if (err) { 190 MLX4_DEBUG("working without blueflame (%d)", err); 191 ring->bf.uar = &mdev->priv_uar; 192 ring->bf.uar->map = mdev->uar_map; 193 ring->bf_enabled = false; 194 } else 195 ring->bf_enabled = true; 196 ring->queue_index = queue_idx; 197 /*if (queue_idx < priv->num_tx_rings_p_up) 198 CPU_SET(queue_idx, &ring->affinity_mask);*/ 199 200 *pring = ring; 201 return 0; 202 203 err_reserve: /*mlx4_qp_release_range(mdev->dev, ring->qpn, 1);*/ 204 err_map: /*mlx4_en_unmap_buffer(&ring->wqres.buf);*/ 205 err_hwq_res: /*mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);*/ 206 err_dma_map: /*for (x = 0; x != size; x++) 207 bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map); 208 err_info: vfree(ring->tx_info);*/ 209 err_ring: /*buf_ring_free(ring->br, M_DEVBUF); 210 err_free_dma_tag: bus_dma_tag_destroy(ring->dma_tag); 211 done: kfree(ring);*/ 212 return err; 213} 214 215/* 216 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, 217 struct mlx4_en_tx_ring **pring) { 218 struct mlx4_en_dev *mdev = priv->mdev; 219 struct mlx4_en_tx_ring *ring = *pring; 220 uint32_t x; 221 MLX4_DEBUG( "Destroying tx ring, qpn: %d\n", ring->qpn); 222 223 buf_ring_free(ring->br, M_DEVBUF); 224 if (ring->bf_enabled) 225 mlx4_bf_free(mdev->dev, &ring->bf); 226 mlx4_qp_remove(mdev->dev, &ring->qp); 227 mlx4_qp_free(mdev->dev, &ring->qp); 228 mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); 229 mlx4_en_unmap_buffer(&ring->wqres.buf); 230 mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); 231 for (x = 0; x != ring->size; x++) 232 bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map); 233 vfree(ring->tx_info); 234 mtx_destroy(&ring->tx_lock.m); 235 mtx_destroy(&ring->comp_lock.m); 236 bus_dma_tag_destroy(ring->dma_tag); 237 kfree(ring); 238 *pring = NULL; 239 } 240 */ 241int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, 242 struct mlx4_en_tx_ring *ring, int cq, int user_prio) { 243 struct mlx4_en_dev *mdev = priv->mdev; 244 int err; 245 246 ring->cqn = cq; 247 ring->prod = 0; 248 ring->cons = 0xffffffff; 249 ring->last_nr_txbb = 1; 250 ring->poll_cnt = 0; 251 ring->blocked = 0; 252 memset(ring->buf, 0, ring->buf_size); 253 254 ring->qp_state = MLX4_QP_STATE_RST; 255 ring->doorbell_qpn = ring->qp.qpn << 8; 256 257 mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, 258 ring->cqn, user_prio, &ring->context); 259 if (ring->bf_enabled) 260 ring->context.usr_page = cpu_to_be32(ring->bf.uar->index); 261 262 err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, 263 &ring->qp, &ring->qp_state); 264 return err; 265} 266/* 267 void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, 268 struct mlx4_en_tx_ring *ring) { 269 struct mlx4_en_dev *mdev = priv->mdev; 270 271 mlx4_qp_modify(mdev->dev, NULL, ring->qp_state, MLX4_QP_STATE_RST, NULL, 0, 272 0, &ring->qp); 273 } 274 */ 275 276/* 277 * Copy data from an mbuf chain starting "off" bytes from the beginning, 278 * continuing for "len" bytes, into the indicated buffer. 279 */ 280// #define mtod(m,t) ((t)(m)->m_data) 281// static void m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) { 282// u_int count; 283// 284// assert(off >= 0); 285// assert(len >= 0); 286// while (off > 0) { 287// assert(m != NULL); 288// if (off < m->m_len) 289// break; 290// off -= m->m_len; 291// m = m->m_next; 292// } 293// while (len > 0) { 294// assert(m != NULL); 295// count = min(m->m_len - off, len); 296// bcopy(mtod(m, caddr_t) + off, cp, count); 297// len -= count; 298// cp += count; 299// off = 0; 300// m = m->m_next; 301// } 302// } 303// 304// static volatile struct mlx4_wqe_data_seg * 305// mlx4_en_store_inline_lso_data(volatile struct mlx4_wqe_data_seg *dseg, 306// struct mbuf *mb, int len, __be32 owner_bit) { 307// uint8_t *inl = __DEVOLATILE(uint8_t *, dseg); 308// 309// /*copy data into place*/ 310// m_copydata(mb, 0, len, (caddr_t) inl + 4); 311// dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT); 312// return (dseg); 313// } 314 315// static void mlx4_en_store_inline_lso_header( 316// volatile struct mlx4_wqe_data_seg *dseg, int len, __be32 owner_bit) { 317// } 318 319static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, 320 struct mlx4_en_tx_ring *ring, u32 index, u8 owner) { 321 struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; 322 struct mlx4_en_tx_desc *tx_desc = (struct mlx4_en_tx_desc *) (ring->buf 323 + (index * TXBB_SIZE)); 324 volatile __be32 *ptr = (__be32 *) tx_desc; 325 const __be32 stamp = cpu_to_be32(STAMP_VAL | ((u32) owner << STAMP_SHIFT)); 326 u32 i; 327 328 /*Stamp the freed descriptor*/ 329 for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { 330 *ptr = stamp; 331 ptr += STAMP_DWORDS; 332 } 333} 334 335static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, 336 struct mlx4_en_tx_ring *ring, u32 index) { 337 struct mlx4_en_tx_info *tx_info; 338 // struct mbuf *mb; 339 340 tx_info = &ring->tx_info[index]; 341 // mb = tx_info->mb; 342 343 // if (mb == NULL) 344 // goto done; 345 346 /*bus_dmamap_sync(ring->dma_tag, tx_info->dma_map, BUS_DMASYNC_POSTWRITE); 347 bus_dmamap_unload(ring->dma_tag, tx_info->dma_map);*/ 348 349 /*TODO proper cleanup*/ 350 /*m_freem(mb);*/ 351 // done: 352 return (tx_info->nr_txbb); 353} 354/* 355 int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { 356 struct mlx4_en_priv *priv = netdev_priv(dev); 357 int cnt = 0; 358 359 Skip last polled descriptor 360 ring->cons += ring->last_nr_txbb; 361 MLX4_DEBUG( "Freeing Tx buf - cons:0x%x prod:0x%x\n", ring->cons, 362 ring->prod); 363 364 if ((u32)(ring->prod - ring->cons) > ring->size) { 365 en_warn(priv, "Tx consumer passed producer!\n"); 366 return 0; 367 } 368 369 while (ring->cons != ring->prod) { 370 ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, 371 ring->cons & ring->size_mask); 372 ring->cons += ring->last_nr_txbb; 373 cnt++; 374 } 375 376 if (cnt) 377 MLX4_DEBUG( "Freed %d uncompleted tx descriptors\n", cnt); 378 379 return cnt; 380 } 381 */ 382static bool mlx4_en_tx_ring_is_full(struct mlx4_en_tx_ring *ring) { 383 int wqs; 384 wqs = ring->size - (ring->prod - ring->cons); 385 return (wqs < (HEADROOM + (2 * MLX4_EN_TX_WQE_MAX_WQEBBS))); 386} 387 388errval_t mlx4_en_dequeue_tx(mlx4_queue_t *queue, regionid_t* rid, genoffset_t* offset, 389 genoffset_t* length, genoffset_t* valid_data, 390 genoffset_t* valid_length, uint64_t* flags) 391{ 392 struct mlx4_en_priv *priv = queue->priv; 393 struct mlx4_en_cq *cq = priv->tx_cq[0]; 394 struct mlx4_cq *mcq = &cq->mcq; 395 struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; 396 struct mlx4_cqe *cqe; 397 u16 index; 398 u16 new_index, ring_index, stamp_index; 399 u32 txbbs_skipped = 0; 400 u32 txbbs_stamp = 0; 401 u32 cons_index = mcq->cons_index; 402 int size = cq->size; 403 u32 size_mask = ring->size_mask; 404 struct mlx4_cqe *buf = cq->buf; 405 int factor = priv->cqe_factor; 406 407 if (!priv->port_up) 408 return 0; 409 410 index = cons_index & size_mask; 411 cqe = &buf[(index << factor) + factor]; 412 ring_index = ring->cons & size_mask; 413 stamp_index = ring_index; 414 415 /*Process all completed CQEs*/ 416 if (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { 417 418 /* make sure we read the CQE after we read the 419 * ownership bit*/ 420 421 422 rmb(); 423 424 if (/*unlikely(*/ 425 (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) 426 == MLX4_CQE_OPCODE_ERROR/*)*/) { 427 MLX4_ERR( 428 "CQE completed in error - vendor syndrom: 0x%x syndrom: 0x%x\n", 429 ((struct mlx4_err_cqe * ) cqe)->vendor_err_syndrome, 430 ((struct mlx4_err_cqe * ) cqe)->syndrome); 431 } 432 433 /*Skip over last polled CQE*/ 434 new_index = be16_to_cpu(cqe->wqe_index) & size_mask; 435 436 if (ring_index != new_index) { 437 txbbs_skipped += ring->last_nr_txbb; 438 ring_index = (ring_index + ring->last_nr_txbb) & size_mask; 439 /*free next descriptor*/ 440 441 struct mlx4_en_tx_info *tx_info; 442 tx_info = &ring->tx_info[ring_index]; 443 ring->last_nr_txbb = tx_info->nr_txbb; 444 445 mlx4_en_stamp_wqe(priv, ring, stamp_index, 446 !!((ring->cons + txbbs_stamp) & ring->size)); 447 stamp_index = ring_index; 448 txbbs_stamp = txbbs_skipped; 449 450 *rid = queue->region_id; 451 *offset = tx_info->offset; 452 *length = tx_info->length; 453 *valid_data = 0; 454 *valid_length = tx_info->nr_bytes; 455 *flags = NETIF_TXFLAG | NETIF_TXFLAG_LAST; 456 } 457 458 ++cons_index; 459 index = cons_index & size_mask; 460 cqe = &buf[(index << factor) + factor]; 461 } else { 462 // debug_printf("%s: NONE\n", __func__); 463 return DEVQ_ERR_QUEUE_EMPTY; 464 } 465 466 /* To prevent CQ overflow we first update CQ consumer and only then 467 * the ring consumer.*/ 468 469 mcq->cons_index = cons_index; 470 mlx4_cq_set_ci(mcq); 471 wmb(); 472 ring->cons += txbbs_skipped; 473 474 /*Wakeup Tx queue if it was stopped and ring is not full*/ 475 if (/*unlikely(*/ring->blocked/*)*/&& !mlx4_en_tx_ring_is_full(ring)) { 476 ring->blocked = 0; 477 /*if (atomic_fetchadd_int(&priv->blocked, -1) == 1) 478 atomic_clear_int(&dev->if_drv_flags, IFF_DRV_OACTIVE);*/ 479 ring->wake_queue++; 480 priv->port_stats.wake_queue++; 481 } 482 // debug_printf("%s:%s: %lx:%ld:%ld:%ld:%lx\n", queue->name, __func__, *offset, *length, *valid_data, *valid_length, *flags); 483 return SYS_ERR_OK; 484} 485 486static int mlx4_en_process_tx_cq(struct mlx4_en_priv *priv, 487 struct mlx4_en_cq *cq) { 488 /*struct mlx4_en_priv *priv = netdev_priv(dev);*/ 489 struct mlx4_cq *mcq = &cq->mcq; 490 struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; 491 struct mlx4_cqe *cqe; 492 u16 index; 493 u16 new_index, ring_index, stamp_index; 494 u32 txbbs_skipped = 0; 495 u32 txbbs_stamp = 0; 496 u32 cons_index = mcq->cons_index; 497 int size = cq->size; 498 u32 size_mask = ring->size_mask; 499 struct mlx4_cqe *buf = cq->buf; 500 int factor = priv->cqe_factor; 501 502 if (!priv->port_up) 503 return 0; 504 505 index = cons_index & size_mask; 506 cqe = &buf[(index << factor) + factor]; 507 ring_index = ring->cons & size_mask; 508 stamp_index = ring_index; 509 510 /*Process all completed CQEs*/ 511 while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { 512 513 /* make sure we read the CQE after we read the 514 * ownership bit*/ 515 516 517 rmb(); 518 519 if (/*unlikely(*/ 520 (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) 521 == MLX4_CQE_OPCODE_ERROR/*)*/) { 522 MLX4_ERR( 523 "CQE completed in error - vendor syndrom: 0x%x syndrom: 0x%x\n", 524 ((struct mlx4_err_cqe * ) cqe)->vendor_err_syndrome, 525 ((struct mlx4_err_cqe * ) cqe)->syndrome); 526 } 527 528 /*Skip over last polled CQE*/ 529 new_index = be16_to_cpu(cqe->wqe_index) & size_mask; 530 531 do { 532 txbbs_skipped += ring->last_nr_txbb; 533 ring_index = (ring_index + ring->last_nr_txbb) & size_mask; 534 /*free next descriptor*/ 535 ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, ring_index); 536 mlx4_en_stamp_wqe(priv, ring, stamp_index, 537 !!((ring->cons + txbbs_stamp) & ring->size)); 538 stamp_index = ring_index; 539 txbbs_stamp = txbbs_skipped; 540 } while (ring_index != new_index); 541 542 ++cons_index; 543 index = cons_index & size_mask; 544 cqe = &buf[(index << factor) + factor]; 545 } 546 547 /* To prevent CQ overflow we first update CQ consumer and only then 548 * the ring consumer.*/ 549 550 mcq->cons_index = cons_index; 551 mlx4_cq_set_ci(mcq); 552 wmb(); 553 ring->cons += txbbs_skipped; 554 555 printf("process TX CQE %d %d\n", ring->cons, index); 556 557 /*Wakeup Tx queue if it was stopped and ring is not full*/ 558 if (/*unlikely(*/ring->blocked/*)*/&& !mlx4_en_tx_ring_is_full(ring)) { 559 ring->blocked = 0; 560 /*if (atomic_fetchadd_int(&priv->blocked, -1) == 1) 561 atomic_clear_int(&dev->if_drv_flags, IFF_DRV_OACTIVE);*/ 562 ring->wake_queue++; 563 priv->port_stats.wake_queue++; 564 } 565 return (0); 566} 567 568void mlx4_en_tx_irq(struct mlx4_cq *mcq) { 569 // struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); 570 // struct mlx4_en_priv *priv = cq->dev; 571 // struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; 572 573 // debug_printf("%s.%d:\n", __func__, __LINE__); 574 // if (priv->port_up == 0/* || !spin_trylock(&ring->comp_lock)*/) 575 // return; 576 // mlx4_en_process_tx_cq(priv, cq); 577 // mod_timer(&cq->timer, jiffies + 1); 578 // spin_unlock(&ring->comp_lock); 579} 580 581// void mlx4_en_poll_tx_cq(unsigned long data) { 582// struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data; 583// struct mlx4_en_priv *priv = netdev_priv(cq->dev); 584// struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; 585// u32 inflight; 586// 587// INC_PERF_COUNTER(priv->pstats.tx_poll); 588// 589// if (priv->port_up == 0) 590// return; 591// if (!spin_trylock(&ring->comp_lock)) { 592// mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); 593// return; 594// } 595// mlx4_en_process_tx_cq(cq->dev, cq); 596// inflight = (u32)(ring->prod - ring->cons - ring->last_nr_txbb); 597// 598// /* If there are still packets in flight and the timer has not already 599// * been scheduled by the Tx routine then schedule it here to guarantee 600// * completion processing of these packets 601// */ 602// if (inflight && priv->port_up) 603// mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); 604// 605// spin_unlock(&ring->comp_lock); 606// } 607 608inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) { 609 struct mlx4_en_cq *cq = priv->tx_cq[tx_ind]; 610 struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind]; 611 612 613 if (priv->port_up == 0) 614 return; 615 616 /*If we don't have a pending timer, set one up to catch our recent 617 post in case the interface becomes idle*/ 618 /*if (!timer_pending(&cq->timer)) 619 mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT);*/ 620 621 /*Poll the CQ every mlx4_en_TX_MODER_POLL packets*/ 622 if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0) 623 /*if (spin_trylock(&ring->comp_lock)) {*/ 624 mlx4_en_process_tx_cq(priv, cq); 625 /*spin_unlock(&ring->comp_lock);*/ 626 /*}*/ 627} 628 629// static u16 mlx4_en_get_inline_hdr_size(struct mlx4_en_tx_ring *ring, 630// struct mbuf *mb) { 631// u16 retval; 632// 633// /*only copy from first fragment, if possible*/ 634// retval = MIN(ring->inline_thold, mb->m_len); 635// 636// /*check for too little data*/ 637// if (/*unlikely(*/retval < MIN_PKT_LEN/*)*/) 638// retval = MIN(ring->inline_thold, mb->m_pkthdr.len); 639// 640// return (retval); 641// } 642 643// static int mlx4_en_get_header_size(struct mbuf *mb) { 644// struct ether_vlan_header *eh; 645// struct tcphdr *th; 646// struct ip *ip; 647// int ip_hlen, tcp_hlen; 648// struct ip6_hdr *ip6; 649// uint16_t eth_type; 650// int eth_hdr_len; 651// 652// eh = mtod(mb, struct ether_vlan_header *); 653// if (mb->m_len < ETHER_HDR_LEN) { 654// return (0); 655// } 656// if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 657// eth_type = ntohs(eh->evl_proto); 658// eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 659// } else { 660// eth_type = ntohs(eh->evl_encap_proto); 661// eth_hdr_len = ETHER_HDR_LEN; 662// } 663// if (mb->m_len < eth_hdr_len) { 664// return (0); 665// } 666// switch (eth_type) { 667// case ETHERTYPE_IP: 668// ip = (struct ip *) (mb->m_data + eth_hdr_len); 669// if (mb->m_len < eth_hdr_len + sizeof(*ip)) 670// return (0); 671// if (ip->ip_p != IPPROTO_TCP) 672// return (0); 673// ip_hlen = ip->ip_hl << 2; 674// eth_hdr_len += ip_hlen; 675// break; 676// case ETHERTYPE_IPV6: 677// ip6 = (struct ip6_hdr *) (mb->m_data + eth_hdr_len); 678// if (mb->m_len < eth_hdr_len + sizeof(*ip6)) 679// return (0); 680// if (ip6->ip6_nxt != IPPROTO_TCP) 681// return (0); 682// eth_hdr_len += sizeof(*ip6); 683// break; 684// default: 685// return (0); 686// } 687// if (mb->m_len < eth_hdr_len + sizeof(*th)) 688// return (0); 689// th = (struct tcphdr *) (mb->m_data + eth_hdr_len); 690// tcp_hlen = th->th_off << 2; 691// eth_hdr_len += tcp_hlen; 692// if (mb->m_len < eth_hdr_len) 693// return (0); 694// return (eth_hdr_len); 695// } 696 697// static volatile struct mlx4_wqe_data_seg * 698// mlx4_en_store_inline_data(volatile struct mlx4_wqe_data_seg *dseg, 699// struct mbuf *mb, int len, __be32 owner_bit) { 700// uint8_t *inl = __DEVOLATILE(uint8_t *, dseg); 701// const int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - 4; 702// 703// if (/*unlikely(*/len < MIN_PKT_LEN/*)*/) { 704// m_copydata(mb, 0, len, (caddr_t) inl + 4); 705// memset(inl + 4 + len, 0, MIN_PKT_LEN - len); 706// dseg += DIV_ROUND_UP(4 + MIN_PKT_LEN, DS_SIZE_ALIGNMENT); 707// } else if (len <= spc) { 708// m_copydata(mb, 0, len, (caddr_t) inl + 4); 709// dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT); 710// } else { 711// m_copydata(mb, 0, spc, (caddr_t) inl + 4); 712// m_copydata(mb, spc, len - spc, (caddr_t) inl + 8 + spc); 713// dseg += DIV_ROUND_UP(8 + len, DS_SIZE_ALIGNMENT); 714// } 715// return (dseg); 716// } 717 718// static void mlx4_en_store_inline_header(volatile struct mlx4_wqe_data_seg *dseg, 719// int len, __be32 owner_bit) { 720// uint8_t *inl = __DEVOLATILE(uint8_t *, dseg); 721// const int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - 4; 722// 723// if (/*unlikely(*/len < MIN_PKT_LEN/*)*/) { 724// *(volatile uint32_t *) inl = SET_BYTE_COUNT((1 << 31) | MIN_PKT_LEN); 725// } else if (len <= spc) { 726// *(volatile uint32_t *) inl = SET_BYTE_COUNT((1 << 31) | len); 727// } else { 728// *(volatile uint32_t *) (inl + 4 + spc) = SET_BYTE_COUNT( 729// (1 << 31) | (len - spc)); 730// wmb(); 731// *(volatile uint32_t *) inl = SET_BYTE_COUNT((1 << 31) | spc); 732// } 733// } 734/* 735 static unsigned long hashrandom; 736 static void hashrandom_init(void *arg) { 737 hashrandom = random(); 738 } 739 SYSINIT(hashrandom_init, SI_SUB_KLD, SI_ORDER_SECOND, &hashrandom_init, NULL); 740 741 u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb) { 742 struct mlx4_en_priv *priv = netdev_priv(dev); 743 u32 rings_p_up = priv->num_tx_rings_p_up; 744 u32 up = 0; 745 u32 queue_index; 746 747 #if (MLX4_EN_NUM_UP > 1) 748 Obtain VLAN information if present 749 if (mb->m_flags & M_VLANTAG) { 750 u32 vlan_tag = mb->m_pkthdr.ether_vtag; 751 up = (vlan_tag >> 13) % MLX4_EN_NUM_UP; 752 } 753 #endif 754 queue_index = mlx4_en_hashmbuf(MLX4_F_HASHL3 | MLX4_F_HASHL4, mb, hashrandom); 755 756 return ((queue_index % rings_p_up) + (up * rings_p_up)); 757 } 758 */ 759// static void mlx4_bf_copy(void /*__iomem*/*dst, volatile unsigned long *src, 760// unsigned bytecnt) { 761// __iowrite64_copy(dst, __DEVOLATILE(void *, src), bytecnt / 8); 762// } 763 764// static u64 mlx4_en_mac_to_u64(u8 *addr) { 765// u64 mac = 0; 766// int i; 767// 768// for (i = 0; i < ETHER_ADDR_LEN; i++) { 769// mac <<= 8; 770// mac |= addr[i]; 771// } 772// return mac; 773// } 774 775// static void m_adj(struct mbuf *mp, int req_len) { 776// int len = req_len; 777// struct mbuf *m; 778// int count; 779// 780// if ((m = mp) == NULL) 781// return; 782// if (len >= 0) { 783// /* 784// * Trim from head. 785// */ 786// while (m != NULL && len > 0) { 787// if (m->m_len <= len) { 788// len -= m->m_len; 789// m->m_len = 0; 790// m = m->m_next; 791// } else { 792// m->m_len -= len; 793// m->m_data += len; 794// len = 0; 795// } 796// } 797// if (mp->m_flags & M_PKTHDR) 798// mp->m_pkthdr.len -= (req_len - len); 799// } else { 800// /* 801// * Trim from tail. Scan the mbuf chain, 802// * calculating its length and finding the last mbuf. 803// * If the adjustment only affects this mbuf, then just 804// * adjust and return. Otherwise, rescan and truncate 805// * after the remaining size. 806// */ 807// len = -len; 808// count = 0; 809// for (;;) { 810// count += m->m_len; 811// if (m->m_next == (struct mbuf *) 0) 812// break; 813// m = m->m_next; 814// } 815// if (m->m_len >= len) { 816// m->m_len -= len; 817// if (mp->m_flags & M_PKTHDR) 818// mp->m_pkthdr.len -= len; 819// return; 820// } 821// count -= len; 822// if (count < 0) 823// count = 0; 824// /* 825// * Correct length for chain is "count". 826// * Find the mbuf with last data, adjust its length, 827// * and toss data from remaining mbufs on chain. 828// */ 829// m = mp; 830// if (m->m_flags & M_PKTHDR) 831// m->m_pkthdr.len = count; 832// for (; m; m = m->m_next) { 833// if (m->m_len >= count) { 834// m->m_len = count; 835// if (m->m_next != NULL) { 836// 837// /*TODO: cleanup*/ 838// /*m_freem(m->m_next);*/ 839// m->m_next = NULL; 840// } 841// break; 842// } 843// count -= m->m_len; 844// } 845// } 846// } 847 848errval_t mlx4_en_enqueue_tx(mlx4_queue_t *queue, regionid_t rid, 849 genoffset_t offset, genoffset_t length, 850 genoffset_t valid_data, genoffset_t valid_length, 851 uint64_t flags) 852{ 853 // debug_printf("%s:%s: %lx:%ld:%ld:%ld:%lx\n", queue->name, __func__, offset, length, valid_data, valid_length, flags); 854 // uint8_t *packet = queue->region_mapped + offset + valid_data; 855 // int i; 856 // packet[6] = 0; 857 // packet[7] = 0; 858 // packet[8] = 0; 859 // packet[9] = 0; 860 // packet[10] = 0; 861 // packet[11] = 0; 862 // packet[0x28] = 0xe4; 863 // packet[0x29] = 0x17; 864 // packet[0x2e] = 0x42; 865 // packet[0x2f] = 0x6a; 866 // packet[0x30] = 0x61; 867 // packet[0x31] = 0x6b; 868 // packet[0x46] = 0; 869 // packet[0x47] = 0; 870 // packet[0x48] = 0; 871 // packet[0x49] = 0; 872 // packet[0x4a] = 0; 873 // packet[0x4b] = 0; 874 875 // for (i = 0; i < valid_length; i += 16) { 876 // debug_printf("%s: %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx\n", __func__, packet[0], packet[1], packet[2], packet[3], packet[4], packet[5], packet[6], packet[7], 877 // packet[8], packet[9], packet[10], packet[11], packet[12], packet[13], packet[14], packet[15]); 878 // packet += 16; 879 // } 880 struct mlx4_en_priv *priv = queue->priv; 881 int tx_ind = 0; 882 genpaddr_t buffer_data = queue->region_base + offset + valid_data; 883 884 /*bus_dma_segment_t segs[MLX4_EN_TX_MAX_MBUF_FRAGS];*/ 885 // volatile struct mlx4_wqe_data_seg *dseg; 886 // volatile struct mlx4_wqe_data_seg *dseg_inline; 887 // volatile struct mlx4_en_tx_desc *tx_desc; 888 // struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind]; 889 // /*struct ifnet *ifp = priv->dev;*/ 890 // struct mlx4_en_tx_info *tx_info; 891 // /*struct mbuf *m;*/ 892 // volatile __be32 *aux; 893 // __be32 owner_bit; 894 // int nr_segs; 895 // int pad; 896 // int err; 897 // u32 bf_size; 898 // u32 bf_prod; 899 // u32 opcode; 900 // u16 index; 901 // u16 ds_cnt; 902 // u16 ihs; 903 // ihs = 0; 904 905 906 // struct mlx4_en_priv *priv = netdev_priv(dev); 907 // struct mlx4_en_dev *mdev = priv->mdev; 908 struct mlx4_en_tx_ring *ring; 909 struct mlx4_en_cq *cq; 910 struct mlx4_en_tx_desc *tx_desc; 911 struct mlx4_wqe_data_seg *data; 912 struct mlx4_en_tx_info *tx_info; 913 int nr_txbb; 914 int desc_size; 915 int real_size; 916 u32 index, bf_index, ring_size; 917 __be32 op_own; 918 bool inl = false; 919 __be32 owner_bit; 920 921 if (!priv->port_up) { 922 goto tx_drop; 923 } 924 925 ring = priv->tx_ring[tx_ind]; 926 ring_size = ring->size; 927 928 owner_bit = (ring->prod & ring->size) ? 929 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0; 930 931 // debug_printf("MAC: %lx\n", priv->mac); 932 // if (ring->inline_thold) 933 // inl = length <= ring->inline_thold; 934 // debug_printf("%s.%d: inline=%d\n", __func__, __LINE__, inl); 935 936 real_size = CTRL_SIZE + DS_SIZE; // for now only one data segment 937 938 /* Align descriptor to TXBB size */ 939 desc_size = ALIGN(real_size, TXBB_SIZE); 940 nr_txbb = desc_size / TXBB_SIZE; 941 // debug_printf("%s.%d: nr_txbb=%d\n", __func__, __LINE__, nr_txbb); 942 943 /* Packet is good - grab an index and transmit it */ 944 index = ring->prod & ring->size_mask; 945 bf_index = ring->prod; 946 // debug_printf("%s.%d: index=%d bf_index=%d\n", __func__, __LINE__, index, bf_index); 947 948 /* See if we have enough space for whole descriptor TXBB for setting 949 * SW ownership on next descriptor; if not, use a bounce buffer. */ 950 tx_desc = ring->buf + index * TXBB_SIZE; 951 952 /* Save mb in tx_info ring */ 953 tx_info = &ring->tx_info[index]; 954 // tx_info->mb = mb; 955 tx_info->nr_txbb = nr_txbb; 956 // tx_info->nr_segs = nr_segs; 957 tx_info->offset = offset; 958 tx_info->length = length; 959 960 data = &tx_desc->data; 961 962 /* valid only for none inline segments */ 963 // tx_info->data_offset = (void *)data - (void *)tx_desc; 964 965 // tx_info->inl = inl; 966 if (!inl) { 967 data->addr = cpu_to_be64(buffer_data); 968 data->lkey = cpu_to_be32(priv->mdev->mr.key); 969 data->byte_count = SET_BYTE_COUNT(valid_length); 970 tx_info->nr_bytes = valid_length; 971 } 972 973 tx_desc->ctrl.vlan_tag = cpu_to_be16(0); 974 tx_desc->ctrl.ins_vlan = 0; 975 tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; 976 tx_desc->ctrl.srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | MLX4_WQE_CTRL_SOLICITED); 977 978 op_own = cpu_to_be32(MLX4_OPCODE_SEND); 979 // tx_info->nr_bytes = max(mb->m_pkthdr.len, 980 // (unsigned int)ETHER_MIN_LEN - ETHER_CRC_LEN); 981 ring->packets++; 982 983 // if (tx_info->inl) { 984 // build_inline_wqe(tx_desc, mb, real_size, &vlan_tag, tx_ind, 985 // owner_bit); 986 // tx_info->inl = 1; 987 // } 988 op_own |= owner_bit; 989 ring->prod += nr_txbb; 990 991 // debug_printf("%s.%d: index=%d bf_index=%d prod=%d\n", __func__, __LINE__, index, bf_index, ring->prod); 992 993 cq = priv->tx_cq[tx_ind]; 994 mlx4_en_arm_cq(priv, cq); 995 996 { 997 /* Ensure new descirptor hits memory 998 * before setting ownership of this descriptor to HW */ 999 wmb(); 1000 tx_desc->ctrl.owner_opcode = op_own; 1001 1002 // uint8_t *desc_data = (void *)tx_desc; 1003 // char line[256]; 1004 // int i, s = 0; 1005 // for (i = 0; i < 64; i++ ) { 1006 // s += sprintf(line + s, "%02x", desc_data[i]); 1007 // if ((i & 3) == 3) 1008 // line[s++] = ' '; 1009 // } 1010 // line[s] = 0; 1011 // debug_printf("DESC: %s\n", line); 1012 1013 wmb(); 1014 writel(cpu_to_be32(ring->doorbell_qpn), ring->bf.uar->map + MLX4_SEND_DOORBELL); 1015 } 1016 1017 return 0; 1018tx_drop: 1019 // *mbp = NULL; 1020 // m_freem(mb); 1021 return EINVAL; 1022} 1023 1024/* 1025 static int mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, 1026 struct mbuf *m) { 1027 struct mlx4_en_priv *priv = netdev_priv(dev); 1028 struct mlx4_en_tx_ring *ring; 1029 struct mbuf *next; 1030 int enqueued, err = 0; 1031 1032 ring = priv->tx_ring[tx_ind]; 1033 if ((dev->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING 1034 || priv->port_up == 0) { 1035 if (m != NULL) 1036 err = drbr_enqueue(dev, ring->br, m); 1037 return (err); 1038 } 1039 1040 enqueued = 0; 1041 if (m != NULL) 1042 1043 * If we can't insert mbuf into drbr, try to xmit anyway. 1044 * We keep the error we got so we could return that after xmit. 1045 1046 err = drbr_enqueue(dev, ring->br, m); 1047 1048 Process the queue 1049 while ((next = drbr_peek(dev, ring->br)) != NULL) { 1050 if (mlx4_en_xmit(priv, tx_ind, &next) != 0) { 1051 if (next == NULL) { 1052 drbr_advance(dev, ring->br); 1053 } else { 1054 drbr_putback(dev, ring->br, next); 1055 } 1056 break; 1057 } 1058 drbr_advance(dev, ring->br); 1059 enqueued++; 1060 if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) 1061 break; 1062 } 1063 1064 if (enqueued > 0) 1065 ring->watchdog_time = ticks; 1066 1067 return (err); 1068 } 1069 1070 void mlx4_en_tx_que(void *context, int pending) { 1071 struct mlx4_en_tx_ring *ring; 1072 struct mlx4_en_priv *priv; 1073 struct net_device *dev; 1074 struct mlx4_en_cq *cq; 1075 int tx_ind; 1076 cq = context; 1077 dev = cq->dev; 1078 priv = dev->if_softc; 1079 tx_ind = cq->ring; 1080 ring = priv->tx_ring[tx_ind]; 1081 1082 if (priv->port_up != 0 && (dev->if_drv_flags & IFF_DRV_RUNNING) != 0) { 1083 mlx4_en_xmit_poll(priv, tx_ind); 1084 spin_lock(&ring->tx_lock); 1085 if (!drbr_empty(dev, ring->br)) 1086 mlx4_en_transmit_locked(dev, tx_ind, NULL); 1087 spin_unlock(&ring->tx_lock); 1088 } 1089 } 1090 1091 int mlx4_en_transmit(struct ifnet *dev, struct mbuf *m) { 1092 struct mlx4_en_priv *priv = netdev_priv(dev); 1093 struct mlx4_en_tx_ring *ring; 1094 struct mlx4_en_cq *cq; 1095 int i, err = 0; 1096 1097 if (priv->port_up == 0) { 1098 m_freem(m); 1099 return (ENETDOWN); 1100 } 1101 1102 Compute which queue to use 1103 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 1104 i = (m->m_pkthdr.flowid % 128) % priv->tx_ring_num; 1105 } else { 1106 i = mlx4_en_select_queue(dev, m); 1107 } 1108 1109 ring = priv->tx_ring[i]; 1110 if (spin_trylock(&ring->tx_lock)) { 1111 err = mlx4_en_transmit_locked(dev, i, m); 1112 spin_unlock(&ring->tx_lock); 1113 Poll CQ here 1114 mlx4_en_xmit_poll(priv, i); 1115 } else { 1116 err = drbr_enqueue(dev, ring->br, m); 1117 cq = priv->tx_cq[i]; 1118 taskqueue_enqueue(cq->tq, &cq->cq_task); 1119 } 1120 1121 return (err); 1122 } 1123 1124 1125 * Flush ring buffers. 1126 1127 void mlx4_en_qflush(struct ifnet *dev) { 1128 struct mlx4_en_priv *priv = netdev_priv(dev); 1129 struct mlx4_en_tx_ring *ring; 1130 struct mbuf *m; 1131 1132 if (priv->port_up == 0) 1133 return; 1134 1135 for (int i = 0; i < priv->tx_ring_num; i++) { 1136 ring = priv->tx_ring[i]; 1137 spin_lock(&ring->tx_lock); 1138 while ((m = buf_ring_dequeue_sc(ring->br)) != NULL) 1139 m_freem(m); 1140 spin_unlock(&ring->tx_lock); 1141 } 1142 if_qflush(dev); 1143 } 1144 */ 1145