1/* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 27/* 28 * $FreeBSD$ 29 * 30 * netmap support for: lem 31 * 32 * For details on netmap support please see ixgbe_netmap.h 33 */ 34 35 36#include <net/netmap.h> 37#include <sys/selinfo.h> 38#include <vm/vm.h> 39#include <vm/pmap.h> /* vtophys ? */ 40#include <dev/netmap/netmap_kern.h> 41 42extern int netmap_adaptive_io; 43 44/* 45 * Register/unregister. We are already under netmap lock. 46 */ 47static int 48lem_netmap_reg(struct netmap_adapter *na, int onoff) 49{ 50 struct ifnet *ifp = na->ifp; 51 struct adapter *adapter = ifp->if_softc; 52 53 EM_CORE_LOCK(adapter); 54 55 lem_disable_intr(adapter); 56 57 /* Tell the stack that the interface is no longer active */ 58 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 59 60#ifndef EM_LEGACY_IRQ // XXX do we need this ? 61 taskqueue_block(adapter->tq); 62 taskqueue_drain(adapter->tq, &adapter->rxtx_task); 63 taskqueue_drain(adapter->tq, &adapter->link_task); 64#endif /* !EM_LEGCY_IRQ */ 65 66 /* enable or disable flags and callbacks in na and ifp */ 67 if (onoff) { 68 nm_set_native_flags(na); 69 } else { 70 nm_clear_native_flags(na); 71 } 72 lem_init_locked(adapter); /* also enable intr */ 73 74#ifndef EM_LEGACY_IRQ 75 taskqueue_unblock(adapter->tq); // XXX do we need this ? 76#endif /* !EM_LEGCY_IRQ */ 77 78 EM_CORE_UNLOCK(adapter); 79 80 return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); 81} 82 83 84/* 85 * Reconcile kernel and user view of the transmit ring. 86 */ 87static int 88lem_netmap_txsync(struct netmap_kring *kring, int flags) 89{ 90 struct netmap_adapter *na = kring->na; 91 struct ifnet *ifp = na->ifp; 92 struct netmap_ring *ring = kring->ring; 93 u_int nm_i; /* index into the netmap ring */ 94 u_int nic_i; /* index into the NIC ring */ 95 u_int const lim = kring->nkr_num_slots - 1; 96 u_int const head = kring->rhead; 97 /* generate an interrupt approximately every half ring */ 98 u_int report_frequency = kring->nkr_num_slots >> 1; 99 100 /* device-specific */ 101 struct adapter *adapter = ifp->if_softc; 102#ifdef NIC_PARAVIRT 103 struct paravirt_csb *csb = adapter->csb; 104 uint64_t *csbd = (uint64_t *)(csb + 1); 105#endif /* NIC_PARAVIRT */ 106 107 bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, 108 BUS_DMASYNC_POSTREAD); 109 110 /* 111 * First part: process new packets to send. 112 */ 113 114 nm_i = kring->nr_hwcur; 115 if (nm_i != head) { /* we have new packets to send */ 116#ifdef NIC_PARAVIRT 117 int do_kick = 0; 118 uint64_t t = 0; // timestamp 119 int n = head - nm_i; 120 if (n < 0) 121 n += lim + 1; 122 if (csb) { 123 t = rdtsc(); /* last timestamp */ 124 csbd[16] += t - csbd[0]; /* total Wg */ 125 csbd[17] += n; /* Wg count */ 126 csbd[0] = t; 127 } 128#endif /* NIC_PARAVIRT */ 129 nic_i = netmap_idx_k2n(kring, nm_i); 130 while (nm_i != head) { 131 struct netmap_slot *slot = &ring->slot[nm_i]; 132 u_int len = slot->len; 133 uint64_t paddr; 134 void *addr = PNMB(na, slot, &paddr); 135 136 /* device-specific */ 137 struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i]; 138 struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i]; 139 int flags = (slot->flags & NS_REPORT || 140 nic_i == 0 || nic_i == report_frequency) ? 141 E1000_TXD_CMD_RS : 0; 142 143 NM_CHECK_ADDR_LEN(na, addr, len); 144 145 if (slot->flags & NS_BUF_CHANGED) { 146 /* buffer has changed, reload map */ 147 curr->buffer_addr = htole64(paddr); 148 netmap_reload_map(na, adapter->txtag, txbuf->map, addr); 149 } 150 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 151 152 /* Fill the slot in the NIC ring. */ 153 curr->upper.data = 0; 154 curr->lower.data = htole32(adapter->txd_cmd | len | 155 (E1000_TXD_CMD_EOP | flags) ); 156 bus_dmamap_sync(adapter->txtag, txbuf->map, 157 BUS_DMASYNC_PREWRITE); 158 159 nm_i = nm_next(nm_i, lim); 160 nic_i = nm_next(nic_i, lim); 161 // XXX might try an early kick 162 } 163 kring->nr_hwcur = head; 164 165 /* synchronize the NIC ring */ 166 bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, 167 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 168 169#ifdef NIC_PARAVIRT 170 /* set unconditionally, then also kick if needed */ 171 if (csb) { 172 t = rdtsc(); 173 if (csb->host_need_txkick == 2) { 174 /* can compute an update of delta */ 175 int64_t delta = t - csbd[3]; 176 if (delta < 0) 177 delta = -delta; 178 if (csbd[8] == 0 || delta < csbd[8]) { 179 csbd[8] = delta; 180 csbd[9]++; 181 } 182 csbd[10]++; 183 } 184 csb->guest_tdt = nic_i; 185 csbd[18] += t - csbd[0]; // total wp 186 csbd[19] += n; 187 } 188 if (!csb || !csb->guest_csb_on || (csb->host_need_txkick & 1)) 189 do_kick = 1; 190 if (do_kick) 191#endif /* NIC_PARAVIRT */ 192 /* (re)start the tx unit up to slot nic_i (excluded) */ 193 E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i); 194#ifdef NIC_PARAVIRT 195 if (do_kick) { 196 uint64_t t1 = rdtsc(); 197 csbd[20] += t1 - t; // total Np 198 csbd[21]++; 199 } 200#endif /* NIC_PARAVIRT */ 201 } 202 203 /* 204 * Second part: reclaim buffers for completed transmissions. 205 */ 206 if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { 207 kring->last_reclaim = ticks; 208 /* record completed transmissions using TDH */ 209#ifdef NIC_PARAVIRT 210 /* host updates tdh unconditionally, and we have 211 * no side effects on reads, so we can read from there 212 * instead of exiting. 213 */ 214 if (csb) { 215 static int drain = 0, nodrain=0, good = 0, bad = 0, fail = 0; 216 u_int x = adapter->next_tx_to_clean; 217 csbd[19]++; // XXX count reclaims 218 nic_i = csb->host_tdh; 219 if (csb->guest_csb_on) { 220 if (nic_i == x) { 221 bad++; 222 csbd[24]++; // failed reclaims 223 /* no progress, request kick and retry */ 224 csb->guest_need_txkick = 1; 225 mb(); // XXX barrier 226 nic_i = csb->host_tdh; 227 } else { 228 good++; 229 } 230 if (nic_i != x) { 231 csb->guest_need_txkick = 2; 232 if (nic_i == csb->guest_tdt) 233 drain++; 234 else 235 nodrain++; 236#if 1 237 if (netmap_adaptive_io) { 238 /* new mechanism: last half ring (or so) 239 * released one slot at a time. 240 * This effectively makes the system spin. 241 * 242 * Take next_to_clean + 1 as a reference. 243 * tdh must be ahead or equal 244 * On entry, the logical order is 245 * x < tdh = nic_i 246 * We first push tdh up to avoid wraps. 247 * The limit is tdh-ll (half ring). 248 * if tdh-256 < x we report x; 249 * else we report tdh-256 250 */ 251 u_int tdh = nic_i; 252 u_int ll = csbd[15]; 253 u_int delta = lim/8; 254 if (netmap_adaptive_io == 2 || ll > delta) 255 csbd[15] = ll = delta; 256 else if (netmap_adaptive_io == 1 && ll > 1) { 257 csbd[15]--; 258 } 259 260 if (nic_i >= kring->nkr_num_slots) { 261 RD(5, "bad nic_i %d on input", nic_i); 262 } 263 x = nm_next(x, lim); 264 if (tdh < x) 265 tdh += lim + 1; 266 if (tdh <= x + ll) { 267 nic_i = x; 268 csbd[25]++; //report n + 1; 269 } else { 270 tdh = nic_i; 271 if (tdh < ll) 272 tdh += lim + 1; 273 nic_i = tdh - ll; 274 csbd[26]++; // report tdh - ll 275 } 276 } 277#endif 278 } else { 279 /* we stop, count whether we are idle or not */ 280 int bh_active = csb->host_need_txkick & 2 ? 4 : 0; 281 csbd[27+ csb->host_need_txkick]++; 282 if (netmap_adaptive_io == 1) { 283 if (bh_active && csbd[15] > 1) 284 csbd[15]--; 285 else if (!bh_active && csbd[15] < lim/2) 286 csbd[15]++; 287 } 288 bad--; 289 fail++; 290 } 291 } 292 RD(1, "drain %d nodrain %d good %d retry %d fail %d", 293 drain, nodrain, good, bad, fail); 294 } else 295#endif /* !NIC_PARAVIRT */ 296 nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0)); 297 if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ 298 D("TDH wrap %d", nic_i); 299 nic_i -= kring->nkr_num_slots; 300 } 301 adapter->next_tx_to_clean = nic_i; 302 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 303 } 304 305 nm_txsync_finalize(kring); 306 307 return 0; 308} 309 310 311/* 312 * Reconcile kernel and user view of the receive ring. 313 */ 314static int 315lem_netmap_rxsync(struct netmap_kring *kring, int flags) 316{ 317 struct netmap_adapter *na = kring->na; 318 struct ifnet *ifp = na->ifp; 319 struct netmap_ring *ring = kring->ring; 320 u_int nm_i; /* index into the netmap ring */ 321 u_int nic_i; /* index into the NIC ring */ 322 u_int n; 323 u_int const lim = kring->nkr_num_slots - 1; 324 u_int const head = nm_rxsync_prologue(kring); 325 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 326 327 /* device-specific */ 328 struct adapter *adapter = ifp->if_softc; 329#ifdef NIC_PARAVIRT 330 struct paravirt_csb *csb = adapter->csb; 331 uint32_t csb_mode = csb && csb->guest_csb_on; 332 uint32_t do_host_rxkick = 0; 333#endif /* NIC_PARAVIRT */ 334 335 if (head > lim) 336 return netmap_ring_reinit(kring); 337 338#ifdef NIC_PARAVIRT 339 if (csb_mode) { 340 force_update = 1; 341 csb->guest_need_rxkick = 0; 342 } 343#endif /* NIC_PARAVIRT */ 344 /* XXX check sync modes */ 345 bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, 346 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 347 348 /* 349 * First part: import newly received packets. 350 */ 351 if (netmap_no_pendintr || force_update) { 352 uint16_t slot_flags = kring->nkr_slot_flags; 353 354 nic_i = adapter->next_rx_desc_to_check; 355 nm_i = netmap_idx_n2k(kring, nic_i); 356 357 for (n = 0; ; n++) { 358 struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i]; 359 uint32_t staterr = le32toh(curr->status); 360 int len; 361 362#ifdef NIC_PARAVIRT 363 if (csb_mode) { 364 if ((staterr & E1000_RXD_STAT_DD) == 0) { 365 /* don't bother to retry if more than 1 pkt */ 366 if (n > 1) 367 break; 368 csb->guest_need_rxkick = 1; 369 wmb(); 370 staterr = le32toh(curr->status); 371 if ((staterr & E1000_RXD_STAT_DD) == 0) { 372 break; 373 } else { /* we are good */ 374 csb->guest_need_rxkick = 0; 375 } 376 } 377 } else 378#endif /* NIC_PARAVIRT */ 379 if ((staterr & E1000_RXD_STAT_DD) == 0) 380 break; 381 len = le16toh(curr->length) - 4; // CRC 382 if (len < 0) { 383 RD(5, "bogus pkt (%d) size %d nic idx %d", n, len, nic_i); 384 len = 0; 385 } 386 ring->slot[nm_i].len = len; 387 ring->slot[nm_i].flags = slot_flags; 388 bus_dmamap_sync(adapter->rxtag, 389 adapter->rx_buffer_area[nic_i].map, 390 BUS_DMASYNC_POSTREAD); 391 nm_i = nm_next(nm_i, lim); 392 nic_i = nm_next(nic_i, lim); 393 } 394 if (n) { /* update the state variables */ 395#ifdef NIC_PARAVIRT 396 if (csb_mode) { 397 if (n > 1) { 398 /* leave one spare buffer so we avoid rxkicks */ 399 nm_i = nm_prev(nm_i, lim); 400 nic_i = nm_prev(nic_i, lim); 401 n--; 402 } else { 403 csb->guest_need_rxkick = 1; 404 } 405 } 406#endif /* NIC_PARAVIRT */ 407 ND("%d new packets at nic %d nm %d tail %d", 408 n, 409 adapter->next_rx_desc_to_check, 410 netmap_idx_n2k(kring, adapter->next_rx_desc_to_check), 411 kring->nr_hwtail); 412 adapter->next_rx_desc_to_check = nic_i; 413 // ifp->if_ipackets += n; 414 kring->nr_hwtail = nm_i; 415 } 416 kring->nr_kflags &= ~NKR_PENDINTR; 417 } 418 419 /* 420 * Second part: skip past packets that userspace has released. 421 */ 422 nm_i = kring->nr_hwcur; 423 if (nm_i != head) { 424 nic_i = netmap_idx_k2n(kring, nm_i); 425 for (n = 0; nm_i != head; n++) { 426 struct netmap_slot *slot = &ring->slot[nm_i]; 427 uint64_t paddr; 428 void *addr = PNMB(na, slot, &paddr); 429 430 struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i]; 431 struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i]; 432 433 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 434 goto ring_reset; 435 436 if (slot->flags & NS_BUF_CHANGED) { 437 /* buffer has changed, reload map */ 438 curr->buffer_addr = htole64(paddr); 439 netmap_reload_map(na, adapter->rxtag, rxbuf->map, addr); 440 slot->flags &= ~NS_BUF_CHANGED; 441 } 442 curr->status = 0; 443 bus_dmamap_sync(adapter->rxtag, rxbuf->map, 444 BUS_DMASYNC_PREREAD); 445#ifdef NIC_PARAVIRT 446 if (csb_mode && csb->host_rxkick_at == nic_i) 447 do_host_rxkick = 1; 448#endif /* NIC_PARAVIRT */ 449 nm_i = nm_next(nm_i, lim); 450 nic_i = nm_next(nic_i, lim); 451 } 452 kring->nr_hwcur = head; 453 bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, 454 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 455 /* 456 * IMPORTANT: we must leave one free slot in the ring, 457 * so move nic_i back by one unit 458 */ 459 nic_i = nm_prev(nic_i, lim); 460#ifdef NIC_PARAVIRT 461 /* set unconditionally, then also kick if needed */ 462 if (csb) 463 csb->guest_rdt = nic_i; 464 if (!csb_mode || do_host_rxkick) 465#endif /* NIC_PARAVIRT */ 466 E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i); 467 } 468 469 /* tell userspace that there might be new packets */ 470 nm_rxsync_finalize(kring); 471 472 return 0; 473 474ring_reset: 475 return netmap_ring_reinit(kring); 476} 477 478 479static void 480lem_netmap_attach(struct adapter *adapter) 481{ 482 struct netmap_adapter na; 483 484 bzero(&na, sizeof(na)); 485 486 na.ifp = adapter->ifp; 487 na.na_flags = NAF_BDG_MAYSLEEP; 488 na.num_tx_desc = adapter->num_tx_desc; 489 na.num_rx_desc = adapter->num_rx_desc; 490 na.nm_txsync = lem_netmap_txsync; 491 na.nm_rxsync = lem_netmap_rxsync; 492 na.nm_register = lem_netmap_reg; 493 na.num_tx_rings = na.num_rx_rings = 1; 494 netmap_attach(&na); 495} 496 497/* end of file */ 498