1/*- 2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org> 3 * Copyright (C) 2013 Intel Corporation 4 * Copyright (C) 2015 EMC Corporation 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29/* 30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect 31 * two or more systems using a PCI-e links, providing remote memory access. 32 * 33 * This module contains a driver for simulated Ethernet device, using 34 * underlying NTB Transport device. 35 * 36 * NOTE: Much of the code in this module is shared with Linux. Any patches may 37 * be picked up and redistributed in Linux with a dual GPL/BSD license. 38 */ 39 40#include <sys/cdefs.h> 41__FBSDID("$FreeBSD$"); 42 43#include <sys/param.h> 44#include <sys/kernel.h> 45#include <sys/systm.h> 46#include <sys/buf_ring.h> 47#include <sys/bus.h> 48#include <sys/ktr.h> 49#include <sys/limits.h> 50#include <sys/module.h> 51#include <sys/socket.h> 52#include <sys/sockio.h> 53#include <sys/sysctl.h> 54#include <sys/taskqueue.h> 55 56#include <net/if.h> 57#include <net/if_media.h> 58#include <net/if_types.h> 59#include <net/if_media.h> 60#include <net/if_var.h> 61#include <net/bpf.h> 62#include <net/ethernet.h> 63 64#include <machine/bus.h> 65 66#include "../ntb_transport.h" 67 68#define KTR_NTB KTR_SPARE3 69#define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX) 70 71#define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP) 72#define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6) 73#define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \ 74 CSUM_PSEUDO_HDR | \ 75 CSUM_IP_CHECKED | CSUM_IP_VALID | \ 76 CSUM_SCTP_VALID) 77 78static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 79 "if_ntb"); 80 81static unsigned g_if_ntb_num_queues = UINT_MAX; 82SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN, 83 &g_if_ntb_num_queues, 0, "Number of queues per interface"); 84 85struct ntb_net_queue { 86 struct ntb_net_ctx *sc; 87 if_t ifp; 88 struct ntb_transport_qp *qp; 89 struct buf_ring *br; 90 struct task tx_task; 91 struct taskqueue *tx_tq; 92 struct mtx tx_lock; 93 struct callout queue_full; 94}; 95 96struct ntb_net_ctx { 97 if_t ifp; 98 struct ifmedia media; 99 u_char eaddr[ETHER_ADDR_LEN]; 100 int num_queues; 101 struct ntb_net_queue *queues; 102 int mtu; 103}; 104 105static int ntb_net_probe(device_t dev); 106static int ntb_net_attach(device_t dev); 107static int ntb_net_detach(device_t dev); 108static void ntb_net_init(void *arg); 109static int ntb_ifmedia_upd(struct ifnet *); 110static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *); 111static int ntb_ioctl(if_t ifp, u_long command, caddr_t data); 112static int ntb_transmit(if_t ifp, struct mbuf *m); 113static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, 114 void *data, int len); 115static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, 116 void *data, int len); 117static void ntb_net_event_handler(void *data, enum ntb_link_event status); 118static void ntb_handle_tx(void *arg, int pending); 119static void ntb_qp_full(void *arg); 120static void ntb_qflush(if_t ifp); 121static void create_random_local_eui48(u_char *eaddr); 122 123static int 124ntb_net_probe(device_t dev) 125{ 126 127 device_set_desc(dev, "NTB Network Interface"); 128 return (0); 129} 130 131static int 132ntb_net_attach(device_t dev) 133{ 134 struct ntb_net_ctx *sc = device_get_softc(dev); 135 struct ntb_net_queue *q; 136 if_t ifp; 137 struct ntb_queue_handlers handlers = { ntb_net_rx_handler, 138 ntb_net_tx_handler, ntb_net_event_handler }; 139 int i; 140 141 ifp = sc->ifp = if_gethandle(IFT_ETHER); 142 if (ifp == NULL) { 143 printf("ntb: Cannot allocate ifnet structure\n"); 144 return (ENOMEM); 145 } 146 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 147 if_setdev(ifp, dev); 148 149 sc->num_queues = min(g_if_ntb_num_queues, 150 ntb_transport_queue_count(dev)); 151 sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue), 152 M_DEVBUF, M_WAITOK | M_ZERO); 153 sc->mtu = INT_MAX; 154 for (i = 0; i < sc->num_queues; i++) { 155 q = &sc->queues[i]; 156 q->sc = sc; 157 q->ifp = ifp; 158 q->qp = ntb_transport_create_queue(dev, i, &handlers, q); 159 if (q->qp == NULL) 160 break; 161 sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp)); 162 mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF); 163 q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock); 164 TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q); 165 q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT, 166 taskqueue_thread_enqueue, &q->tx_tq); 167 taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d", 168 device_get_nameunit(dev), i); 169 callout_init(&q->queue_full, 1); 170 } 171 sc->num_queues = i; 172 device_printf(dev, "%d queue(s)\n", sc->num_queues); 173 174 if_setinitfn(ifp, ntb_net_init); 175 if_setsoftc(ifp, sc); 176 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 177 if_setioctlfn(ifp, ntb_ioctl); 178 if_settransmitfn(ifp, ntb_transmit); 179 if_setqflushfn(ifp, ntb_qflush); 180 create_random_local_eui48(sc->eaddr); 181 ether_ifattach(ifp, sc->eaddr); 182 if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | 183 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); 184 if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); 185 if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN); 186 187 ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd, 188 ntb_ifmedia_sts); 189 ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL); 190 ifmedia_set(&sc->media, NTB_MEDIATYPE); 191 192 for (i = 0; i < sc->num_queues; i++) 193 ntb_transport_link_up(sc->queues[i].qp); 194 return (0); 195} 196 197static int 198ntb_net_detach(device_t dev) 199{ 200 struct ntb_net_ctx *sc = device_get_softc(dev); 201 struct ntb_net_queue *q; 202 int i; 203 204 for (i = 0; i < sc->num_queues; i++) 205 ntb_transport_link_down(sc->queues[i].qp); 206 ether_ifdetach(sc->ifp); 207 if_free(sc->ifp); 208 ifmedia_removeall(&sc->media); 209 for (i = 0; i < sc->num_queues; i++) { 210 q = &sc->queues[i]; 211 ntb_transport_free_queue(q->qp); 212 buf_ring_free(q->br, M_DEVBUF); 213 callout_drain(&q->queue_full); 214 taskqueue_drain_all(q->tx_tq); 215 mtx_destroy(&q->tx_lock); 216 } 217 free(sc->queues, M_DEVBUF); 218 return (0); 219} 220 221/* Network device interface */ 222 223static void 224ntb_net_init(void *arg) 225{ 226 struct ntb_net_ctx *sc = arg; 227 if_t ifp = sc->ifp; 228 229 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 230 if_setbaudrate(ifp, ntb_transport_link_speed(sc->queues[0].qp)); 231 if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ? 232 LINK_STATE_UP : LINK_STATE_DOWN); 233} 234 235static int 236ntb_ioctl(if_t ifp, u_long command, caddr_t data) 237{ 238 struct ntb_net_ctx *sc = if_getsoftc(ifp); 239 struct ifreq *ifr = (struct ifreq *)data; 240 int error = 0; 241 242 switch (command) { 243 case SIOCSIFFLAGS: 244 case SIOCADDMULTI: 245 case SIOCDELMULTI: 246 break; 247 248 case SIOCSIFMTU: 249 { 250 if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) { 251 error = EINVAL; 252 break; 253 } 254 255 if_setmtu(ifp, ifr->ifr_mtu); 256 break; 257 } 258 259 case SIOCSIFMEDIA: 260 case SIOCGIFMEDIA: 261 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 262 break; 263 264 case SIOCSIFCAP: 265 if (ifr->ifr_reqcap & IFCAP_RXCSUM) 266 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); 267 else 268 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM); 269 if (ifr->ifr_reqcap & IFCAP_TXCSUM) { 270 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); 271 if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0); 272 } else { 273 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM); 274 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES); 275 } 276 if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6) 277 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0); 278 else 279 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6); 280 if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) { 281 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0); 282 if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0); 283 } else { 284 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6); 285 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6); 286 } 287 break; 288 289 default: 290 error = ether_ioctl(ifp, command, data); 291 break; 292 } 293 294 return (error); 295} 296 297static int 298ntb_ifmedia_upd(struct ifnet *ifp) 299{ 300 struct ntb_net_ctx *sc = if_getsoftc(ifp); 301 struct ifmedia *ifm = &sc->media; 302 303 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 304 return (EINVAL); 305 306 return (0); 307} 308 309static void 310ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 311{ 312 struct ntb_net_ctx *sc = if_getsoftc(ifp); 313 314 ifmr->ifm_status = IFM_AVALID; 315 ifmr->ifm_active = NTB_MEDIATYPE; 316 if (ntb_transport_link_query(sc->queues[0].qp)) 317 ifmr->ifm_status |= IFM_ACTIVE; 318} 319 320static void 321ntb_transmit_locked(struct ntb_net_queue *q) 322{ 323 if_t ifp = q->ifp; 324 struct mbuf *m; 325 int rc, len; 326 short mflags; 327 328 CTR0(KTR_NTB, "TX: ntb_transmit_locked"); 329 while ((m = drbr_peek(ifp, q->br)) != NULL) { 330 CTR1(KTR_NTB, "TX: start mbuf %p", m); 331 if_etherbpfmtap(ifp, m); 332 len = m->m_pkthdr.len; 333 mflags = m->m_flags; 334 rc = ntb_transport_tx_enqueue(q->qp, m, m, len); 335 if (rc != 0) { 336 CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc); 337 if (rc == EAGAIN) { 338 drbr_putback(ifp, q->br, m); 339 callout_reset_sbt(&q->queue_full, 340 SBT_1MS / 4, SBT_1MS / 4, 341 ntb_qp_full, q, 0); 342 } else { 343 m_freem(m); 344 drbr_advance(ifp, q->br); 345 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 346 } 347 break; 348 } 349 drbr_advance(ifp, q->br); 350 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 351 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 352 if (mflags & M_MCAST) 353 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 354 } 355} 356 357static int 358ntb_transmit(if_t ifp, struct mbuf *m) 359{ 360 struct ntb_net_ctx *sc = if_getsoftc(ifp); 361 struct ntb_net_queue *q; 362 int error, i; 363 364 CTR0(KTR_NTB, "TX: ntb_transmit"); 365 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 366 i = m->m_pkthdr.flowid % sc->num_queues; 367 else 368 i = curcpu % sc->num_queues; 369 q = &sc->queues[i]; 370 371 error = drbr_enqueue(ifp, q->br, m); 372 if (error) 373 return (error); 374 375 if (mtx_trylock(&q->tx_lock)) { 376 ntb_transmit_locked(q); 377 mtx_unlock(&q->tx_lock); 378 } else 379 taskqueue_enqueue(q->tx_tq, &q->tx_task); 380 return (0); 381} 382 383static void 384ntb_handle_tx(void *arg, int pending) 385{ 386 struct ntb_net_queue *q = arg; 387 388 mtx_lock(&q->tx_lock); 389 ntb_transmit_locked(q); 390 mtx_unlock(&q->tx_lock); 391} 392 393static void 394ntb_qp_full(void *arg) 395{ 396 struct ntb_net_queue *q = arg; 397 398 CTR0(KTR_NTB, "TX: qp_full callout"); 399 if (ntb_transport_tx_free_entry(q->qp) > 0) 400 taskqueue_enqueue(q->tx_tq, &q->tx_task); 401 else 402 callout_schedule_sbt(&q->queue_full, 403 SBT_1MS / 4, SBT_1MS / 4, 0); 404} 405 406static void 407ntb_qflush(if_t ifp) 408{ 409 struct ntb_net_ctx *sc = if_getsoftc(ifp); 410 struct ntb_net_queue *q; 411 struct mbuf *m; 412 int i; 413 414 for (i = 0; i < sc->num_queues; i++) { 415 q = &sc->queues[i]; 416 mtx_lock(&q->tx_lock); 417 while ((m = buf_ring_dequeue_sc(q->br)) != NULL) 418 m_freem(m); 419 mtx_unlock(&q->tx_lock); 420 } 421 if_qflush(ifp); 422} 423 424/* Network Device Callbacks */ 425static void 426ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, 427 int len) 428{ 429 430 m_freem(data); 431 CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data); 432} 433 434static void 435ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, 436 int len) 437{ 438 struct ntb_net_queue *q = qp_data; 439 struct ntb_net_ctx *sc = q->sc; 440 struct mbuf *m = data; 441 if_t ifp = q->ifp; 442 uint16_t proto; 443 444 CTR1(KTR_NTB, "RX: rx handler (%d)", len); 445 if (len < 0) { 446 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 447 return; 448 } 449 450 m->m_pkthdr.rcvif = ifp; 451 if (sc->num_queues > 1) { 452 m->m_pkthdr.flowid = q - sc->queues; 453 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 454 } 455 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 456 m_copydata(m, 12, 2, (void *)&proto); 457 switch (ntohs(proto)) { 458 case ETHERTYPE_IP: 459 if (if_getcapenable(ifp) & IFCAP_RXCSUM) { 460 m->m_pkthdr.csum_data = 0xffff; 461 m->m_pkthdr.csum_flags = NTB_CSUM_SET; 462 } 463 break; 464 case ETHERTYPE_IPV6: 465 if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) { 466 m->m_pkthdr.csum_data = 0xffff; 467 m->m_pkthdr.csum_flags = NTB_CSUM_SET; 468 } 469 break; 470 } 471 } 472 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 473 if_input(ifp, m); 474} 475 476static void 477ntb_net_event_handler(void *data, enum ntb_link_event status) 478{ 479 struct ntb_net_queue *q = data; 480 481 if_setbaudrate(q->ifp, ntb_transport_link_speed(q->qp)); 482 if_link_state_change(q->ifp, (status == NTB_LINK_UP) ? LINK_STATE_UP : 483 LINK_STATE_DOWN); 484} 485 486/* Helper functions */ 487/* TODO: This too should really be part of the kernel */ 488#define EUI48_MULTICAST 1 << 0 489#define EUI48_LOCALLY_ADMINISTERED 1 << 1 490static void 491create_random_local_eui48(u_char *eaddr) 492{ 493 static uint8_t counter = 0; 494 495 eaddr[0] = EUI48_LOCALLY_ADMINISTERED; 496 arc4rand(&eaddr[1], 4, 0); 497 eaddr[5] = counter++; 498} 499 500static device_method_t ntb_net_methods[] = { 501 /* Device interface */ 502 DEVMETHOD(device_probe, ntb_net_probe), 503 DEVMETHOD(device_attach, ntb_net_attach), 504 DEVMETHOD(device_detach, ntb_net_detach), 505 DEVMETHOD_END 506}; 507 508devclass_t ntb_net_devclass; 509static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods, 510 sizeof(struct ntb_net_ctx)); 511DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass, 512 NULL, NULL); 513MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1); 514MODULE_VERSION(if_ntb, 1); 515