if_lem_netmap.h revision 285349
1/* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 27/* 28 * $FreeBSD: head/sys/dev/netmap/if_lem_netmap.h 285349 2015-07-10 05:51:36Z luigi $ 29 * 30 * netmap support for: lem 31 * 32 * For details on netmap support please see ixgbe_netmap.h 33 */ 34 35 36#include <net/netmap.h> 37#include <sys/selinfo.h> 38#include <vm/vm.h> 39#include <vm/pmap.h> /* vtophys ? */ 40#include <dev/netmap/netmap_kern.h> 41 42extern int netmap_adaptive_io; 43 44/* 45 * Register/unregister. We are already under netmap lock. 46 */ 47static int 48lem_netmap_reg(struct netmap_adapter *na, int onoff) 49{ 50 struct ifnet *ifp = na->ifp; 51 struct adapter *adapter = ifp->if_softc; 52 53 EM_CORE_LOCK(adapter); 54 55 lem_disable_intr(adapter); 56 57 /* Tell the stack that the interface is no longer active */ 58 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 59 60#ifndef EM_LEGACY_IRQ // XXX do we need this ? 61 taskqueue_block(adapter->tq); 62 taskqueue_drain(adapter->tq, &adapter->rxtx_task); 63 taskqueue_drain(adapter->tq, &adapter->link_task); 64#endif /* !EM_LEGCY_IRQ */ 65 66 /* enable or disable flags and callbacks in na and ifp */ 67 if (onoff) { 68 nm_set_native_flags(na); 69 } else { 70 nm_clear_native_flags(na); 71 } 72 lem_init_locked(adapter); /* also enable intr */ 73 74#ifndef EM_LEGACY_IRQ 75 taskqueue_unblock(adapter->tq); // XXX do we need this ? 76#endif /* !EM_LEGCY_IRQ */ 77 78 EM_CORE_UNLOCK(adapter); 79 80 return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); 81} 82 83 84/* 85 * Reconcile kernel and user view of the transmit ring. 86 */ 87static int 88lem_netmap_txsync(struct netmap_kring *kring, int flags) 89{ 90 struct netmap_adapter *na = kring->na; 91 struct ifnet *ifp = na->ifp; 92 struct netmap_ring *ring = kring->ring; 93 u_int nm_i; /* index into the netmap ring */ 94 u_int nic_i; /* index into the NIC ring */ 95 u_int const lim = kring->nkr_num_slots - 1; 96 u_int const head = kring->rhead; 97 /* generate an interrupt approximately every half ring */ 98 u_int report_frequency = kring->nkr_num_slots >> 1; 99 100 /* device-specific */ 101 struct adapter *adapter = ifp->if_softc; 102#ifdef NIC_PARAVIRT 103 struct paravirt_csb *csb = adapter->csb; 104 uint64_t *csbd = (uint64_t *)(csb + 1); 105#endif /* NIC_PARAVIRT */ 106 107 bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, 108 BUS_DMASYNC_POSTREAD); 109 110 /* 111 * First part: process new packets to send. 112 */ 113 114 nm_i = kring->nr_hwcur; 115 if (nm_i != head) { /* we have new packets to send */ 116#ifdef NIC_PARAVIRT 117 int do_kick = 0; 118 uint64_t t = 0; // timestamp 119 int n = head - nm_i; 120 if (n < 0) 121 n += lim + 1; 122 if (csb) { 123 t = rdtsc(); /* last timestamp */ 124 csbd[16] += t - csbd[0]; /* total Wg */ 125 csbd[17] += n; /* Wg count */ 126 csbd[0] = t; 127 } 128#endif /* NIC_PARAVIRT */ 129 nic_i = netmap_idx_k2n(kring, nm_i); 130 while (nm_i != head) { 131 struct netmap_slot *slot = &ring->slot[nm_i]; 132 u_int len = slot->len; 133 uint64_t paddr; 134 void *addr = PNMB(na, slot, &paddr); 135 136 /* device-specific */ 137 struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i]; 138 struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i]; 139 int flags = (slot->flags & NS_REPORT || 140 nic_i == 0 || nic_i == report_frequency) ? 141 E1000_TXD_CMD_RS : 0; 142 143 NM_CHECK_ADDR_LEN(na, addr, len); 144 145 if (slot->flags & NS_BUF_CHANGED) { 146 /* buffer has changed, reload map */ 147 curr->buffer_addr = htole64(paddr); 148 netmap_reload_map(na, adapter->txtag, txbuf->map, addr); 149 } 150 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 151 152 /* Fill the slot in the NIC ring. */ 153 curr->upper.data = 0; 154 curr->lower.data = htole32(adapter->txd_cmd | len | 155 (E1000_TXD_CMD_EOP | flags) ); 156 bus_dmamap_sync(adapter->txtag, txbuf->map, 157 BUS_DMASYNC_PREWRITE); 158 159 nm_i = nm_next(nm_i, lim); 160 nic_i = nm_next(nic_i, lim); 161 // XXX might try an early kick 162 } 163 kring->nr_hwcur = head; 164 165 /* synchronize the NIC ring */ 166 bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, 167 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 168 169#ifdef NIC_PARAVIRT 170 /* set unconditionally, then also kick if needed */ 171 if (csb) { 172 t = rdtsc(); 173 if (csb->host_need_txkick == 2) { 174 /* can compute an update of delta */ 175 int64_t delta = t - csbd[3]; 176 if (delta < 0) 177 delta = -delta; 178 if (csbd[8] == 0 || delta < csbd[8]) { 179 csbd[8] = delta; 180 csbd[9]++; 181 } 182 csbd[10]++; 183 } 184 csb->guest_tdt = nic_i; 185 csbd[18] += t - csbd[0]; // total wp 186 csbd[19] += n; 187 } 188 if (!csb || !csb->guest_csb_on || (csb->host_need_txkick & 1)) 189 do_kick = 1; 190 if (do_kick) 191#endif /* NIC_PARAVIRT */ 192 /* (re)start the tx unit up to slot nic_i (excluded) */ 193 E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i); 194#ifdef NIC_PARAVIRT 195 if (do_kick) { 196 uint64_t t1 = rdtsc(); 197 csbd[20] += t1 - t; // total Np 198 csbd[21]++; 199 } 200#endif /* NIC_PARAVIRT */ 201 } 202 203 /* 204 * Second part: reclaim buffers for completed transmissions. 205 */ 206 if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { 207 kring->last_reclaim = ticks; 208 /* record completed transmissions using TDH */ 209#ifdef NIC_PARAVIRT 210 /* host updates tdh unconditionally, and we have 211 * no side effects on reads, so we can read from there 212 * instead of exiting. 213 */ 214 if (csb) { 215 static int drain = 0, nodrain=0, good = 0, bad = 0, fail = 0; 216 u_int x = adapter->next_tx_to_clean; 217 csbd[19]++; // XXX count reclaims 218 nic_i = csb->host_tdh; 219 if (csb->guest_csb_on) { 220 if (nic_i == x) { 221 bad++; 222 csbd[24]++; // failed reclaims 223 /* no progress, request kick and retry */ 224 csb->guest_need_txkick = 1; 225 mb(); // XXX barrier 226 nic_i = csb->host_tdh; 227 } else { 228 good++; 229 } 230 if (nic_i != x) { 231 csb->guest_need_txkick = 2; 232 if (nic_i == csb->guest_tdt) 233 drain++; 234 else 235 nodrain++; 236#if 1 237 if (netmap_adaptive_io) { 238 /* new mechanism: last half ring (or so) 239 * released one slot at a time. 240 * This effectively makes the system spin. 241 * 242 * Take next_to_clean + 1 as a reference. 243 * tdh must be ahead or equal 244 * On entry, the logical order is 245 * x < tdh = nic_i 246 * We first push tdh up to avoid wraps. 247 * The limit is tdh-ll (half ring). 248 * if tdh-256 < x we report x; 249 * else we report tdh-256 250 */ 251 u_int tdh = nic_i; 252 u_int ll = csbd[15]; 253 u_int delta = lim/8; 254 if (netmap_adaptive_io == 2 || ll > delta) 255 csbd[15] = ll = delta; 256 else if (netmap_adaptive_io == 1 && ll > 1) { 257 csbd[15]--; 258 } 259 260 if (nic_i >= kring->nkr_num_slots) { 261 RD(5, "bad nic_i %d on input", nic_i); 262 } 263 x = nm_next(x, lim); 264 if (tdh < x) 265 tdh += lim + 1; 266 if (tdh <= x + ll) { 267 nic_i = x; 268 csbd[25]++; //report n + 1; 269 } else { 270 tdh = nic_i; 271 if (tdh < ll) 272 tdh += lim + 1; 273 nic_i = tdh - ll; 274 csbd[26]++; // report tdh - ll 275 } 276 } 277#endif 278 } else { 279 /* we stop, count whether we are idle or not */ 280 int bh_active = csb->host_need_txkick & 2 ? 4 : 0; 281 csbd[27+ csb->host_need_txkick]++; 282 if (netmap_adaptive_io == 1) { 283 if (bh_active && csbd[15] > 1) 284 csbd[15]--; 285 else if (!bh_active && csbd[15] < lim/2) 286 csbd[15]++; 287 } 288 bad--; 289 fail++; 290 } 291 } 292 RD(1, "drain %d nodrain %d good %d retry %d fail %d", 293 drain, nodrain, good, bad, fail); 294 } else 295#endif /* !NIC_PARAVIRT */ 296 nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0)); 297 if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ 298 D("TDH wrap %d", nic_i); 299 nic_i -= kring->nkr_num_slots; 300 } 301 adapter->next_tx_to_clean = nic_i; 302 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 303 } 304 305 return 0; 306} 307 308 309/* 310 * Reconcile kernel and user view of the receive ring. 311 */ 312static int 313lem_netmap_rxsync(struct netmap_kring *kring, int flags) 314{ 315 struct netmap_adapter *na = kring->na; 316 struct ifnet *ifp = na->ifp; 317 struct netmap_ring *ring = kring->ring; 318 u_int nm_i; /* index into the netmap ring */ 319 u_int nic_i; /* index into the NIC ring */ 320 u_int n; 321 u_int const lim = kring->nkr_num_slots - 1; 322 u_int const head = kring->rhead; 323 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 324 325 /* device-specific */ 326 struct adapter *adapter = ifp->if_softc; 327#ifdef NIC_PARAVIRT 328 struct paravirt_csb *csb = adapter->csb; 329 uint32_t csb_mode = csb && csb->guest_csb_on; 330 uint32_t do_host_rxkick = 0; 331#endif /* NIC_PARAVIRT */ 332 333 if (head > lim) 334 return netmap_ring_reinit(kring); 335 336#ifdef NIC_PARAVIRT 337 if (csb_mode) { 338 force_update = 1; 339 csb->guest_need_rxkick = 0; 340 } 341#endif /* NIC_PARAVIRT */ 342 /* XXX check sync modes */ 343 bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, 344 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 345 346 /* 347 * First part: import newly received packets. 348 */ 349 if (netmap_no_pendintr || force_update) { 350 uint16_t slot_flags = kring->nkr_slot_flags; 351 352 nic_i = adapter->next_rx_desc_to_check; 353 nm_i = netmap_idx_n2k(kring, nic_i); 354 355 for (n = 0; ; n++) { 356 struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i]; 357 uint32_t staterr = le32toh(curr->status); 358 int len; 359 360#ifdef NIC_PARAVIRT 361 if (csb_mode) { 362 if ((staterr & E1000_RXD_STAT_DD) == 0) { 363 /* don't bother to retry if more than 1 pkt */ 364 if (n > 1) 365 break; 366 csb->guest_need_rxkick = 1; 367 wmb(); 368 staterr = le32toh(curr->status); 369 if ((staterr & E1000_RXD_STAT_DD) == 0) { 370 break; 371 } else { /* we are good */ 372 csb->guest_need_rxkick = 0; 373 } 374 } 375 } else 376#endif /* NIC_PARAVIRT */ 377 if ((staterr & E1000_RXD_STAT_DD) == 0) 378 break; 379 len = le16toh(curr->length) - 4; // CRC 380 if (len < 0) { 381 RD(5, "bogus pkt (%d) size %d nic idx %d", n, len, nic_i); 382 len = 0; 383 } 384 ring->slot[nm_i].len = len; 385 ring->slot[nm_i].flags = slot_flags; 386 bus_dmamap_sync(adapter->rxtag, 387 adapter->rx_buffer_area[nic_i].map, 388 BUS_DMASYNC_POSTREAD); 389 nm_i = nm_next(nm_i, lim); 390 nic_i = nm_next(nic_i, lim); 391 } 392 if (n) { /* update the state variables */ 393#ifdef NIC_PARAVIRT 394 if (csb_mode) { 395 if (n > 1) { 396 /* leave one spare buffer so we avoid rxkicks */ 397 nm_i = nm_prev(nm_i, lim); 398 nic_i = nm_prev(nic_i, lim); 399 n--; 400 } else { 401 csb->guest_need_rxkick = 1; 402 } 403 } 404#endif /* NIC_PARAVIRT */ 405 ND("%d new packets at nic %d nm %d tail %d", 406 n, 407 adapter->next_rx_desc_to_check, 408 netmap_idx_n2k(kring, adapter->next_rx_desc_to_check), 409 kring->nr_hwtail); 410 adapter->next_rx_desc_to_check = nic_i; 411 // if_inc_counter(ifp, IFCOUNTER_IPACKETS, n); 412 kring->nr_hwtail = nm_i; 413 } 414 kring->nr_kflags &= ~NKR_PENDINTR; 415 } 416 417 /* 418 * Second part: skip past packets that userspace has released. 419 */ 420 nm_i = kring->nr_hwcur; 421 if (nm_i != head) { 422 nic_i = netmap_idx_k2n(kring, nm_i); 423 for (n = 0; nm_i != head; n++) { 424 struct netmap_slot *slot = &ring->slot[nm_i]; 425 uint64_t paddr; 426 void *addr = PNMB(na, slot, &paddr); 427 428 struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i]; 429 struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i]; 430 431 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 432 goto ring_reset; 433 434 if (slot->flags & NS_BUF_CHANGED) { 435 /* buffer has changed, reload map */ 436 curr->buffer_addr = htole64(paddr); 437 netmap_reload_map(na, adapter->rxtag, rxbuf->map, addr); 438 slot->flags &= ~NS_BUF_CHANGED; 439 } 440 curr->status = 0; 441 bus_dmamap_sync(adapter->rxtag, rxbuf->map, 442 BUS_DMASYNC_PREREAD); 443#ifdef NIC_PARAVIRT 444 if (csb_mode && csb->host_rxkick_at == nic_i) 445 do_host_rxkick = 1; 446#endif /* NIC_PARAVIRT */ 447 nm_i = nm_next(nm_i, lim); 448 nic_i = nm_next(nic_i, lim); 449 } 450 kring->nr_hwcur = head; 451 bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, 452 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 453 /* 454 * IMPORTANT: we must leave one free slot in the ring, 455 * so move nic_i back by one unit 456 */ 457 nic_i = nm_prev(nic_i, lim); 458#ifdef NIC_PARAVIRT 459 /* set unconditionally, then also kick if needed */ 460 if (csb) 461 csb->guest_rdt = nic_i; 462 if (!csb_mode || do_host_rxkick) 463#endif /* NIC_PARAVIRT */ 464 E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i); 465 } 466 467 return 0; 468 469ring_reset: 470 return netmap_ring_reinit(kring); 471} 472 473 474static void 475lem_netmap_attach(struct adapter *adapter) 476{ 477 struct netmap_adapter na; 478 479 bzero(&na, sizeof(na)); 480 481 na.ifp = adapter->ifp; 482 na.na_flags = NAF_BDG_MAYSLEEP; 483 na.num_tx_desc = adapter->num_tx_desc; 484 na.num_rx_desc = adapter->num_rx_desc; 485 na.nm_txsync = lem_netmap_txsync; 486 na.nm_rxsync = lem_netmap_rxsync; 487 na.nm_register = lem_netmap_reg; 488 na.num_tx_rings = na.num_rx_rings = 1; 489 netmap_attach(&na); 490} 491 492/* end of file */ 493